|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +cmake_minimum_required(VERSION 3.19) |
| 8 | +include(CMakeDependentOption) |
| 9 | + |
| 10 | +project(torchao) |
| 11 | + |
| 12 | +set(CMAKE_CXX_STANDARD 17) |
| 13 | + |
| 14 | +if (NOT CMAKE_BUILD_TYPE) |
| 15 | + set(CMAKE_BUILD_TYPE Release) |
| 16 | +endif() |
| 17 | + |
| 18 | +# Platform options |
| 19 | +option(TORCHAO_BUILD_ATEN_OPS "Building torchao ops for ATen." ON) |
| 20 | +option(TORCHAO_BUILD_EXECUTORCH_OPS "Building torchao ops for ExecuTorch." OFF) |
| 21 | +option(TORCHAO_BUILD_CPU_AARCH64 "Build torchao's CPU aarch64 kernels" OFF) |
| 22 | +option(TORCHAO_BUILD_KLEIDIAI "Download, build, and link against Arm KleidiAI library (arm64 only)" OFF) |
| 23 | +option(TORCHAO_ENABLE_ARM_NEON_DOT "Enable ARM Neon Dot Product extension" OFF) |
| 24 | +option(TORCHAO_ENABLE_ARM_I8MM "Enable ARM 8-bit Integer Matrix Multiply instructions" OFF) |
| 25 | +option(TORCHAO_BUILD_TESTS "Build tests" OFF) |
| 26 | +option(TORCHAO_BUILD_BENCHMARKS "Build tests" OFF) |
| 27 | + |
| 28 | +# Set default compiler options |
| 29 | +add_compile_options("-fPIC" "-Wall" "-Werror" "-Wno-deprecated") |
| 30 | +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") |
| 31 | + add_compile_options( |
| 32 | + "-Wno-error=unknown-pragmas" |
| 33 | + "-Wno-array-parameter" |
| 34 | + "-Wno-maybe-uninitialized" |
| 35 | + "-Wno-sign-compare" |
| 36 | + ) |
| 37 | +elseif (APPLE) |
| 38 | + add_compile_options("-Wno-shorten-64-to-32") |
| 39 | +endif() |
| 40 | + |
| 41 | + |
| 42 | + |
| 43 | +if (NOT TARGET cpuinfo) |
| 44 | + cmake_policy(PUSH) |
| 45 | + cmake_policy(VERSION 3.5) # cpuinfo requires CMake 3.5 |
| 46 | + |
| 47 | + # For some reason cpuinfo package has unused functions/variables |
| 48 | + # TODO (T215533422): fix upstream |
| 49 | + add_compile_options(-Wno-unused-function -Wno-unused-variable) |
| 50 | + |
| 51 | + # set(CMAKE_POLICY_VERSION_MINIMUM 3.5) |
| 52 | + include(FetchContent) |
| 53 | + set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE BOOL "" FORCE) |
| 54 | + set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "" FORCE) |
| 55 | + set(CPUINFO_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE) |
| 56 | + FetchContent_Declare(cpuinfo |
| 57 | + GIT_REPOSITORY https://github.com/pytorch/cpuinfo.git |
| 58 | + GIT_TAG c61fe919607bbc534d7a5a5707bdd7041e72c5ff |
| 59 | + ) |
| 60 | + FetchContent_MakeAvailable( |
| 61 | + cpuinfo) |
| 62 | + |
| 63 | + cmake_policy(POP) |
| 64 | +endif() |
| 65 | + |
| 66 | +if (TORCHAO_BUILD_TESTS) |
| 67 | + include(FetchContent) |
| 68 | + FetchContent_Declare( |
| 69 | + googletest |
| 70 | + URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip |
| 71 | + ) |
| 72 | + FetchContent_MakeAvailable(googletest) |
| 73 | +endif() |
| 74 | + |
| 75 | +if (TORCHAO_BUILD_BENCHMARKS) |
| 76 | + include(FetchContent) |
| 77 | + FetchContent_Declare(googlebenchmark |
| 78 | + GIT_REPOSITORY https://github.com/google/benchmark.git |
| 79 | + GIT_TAG main) # need main for benchmark::benchmark |
| 80 | + |
| 81 | + set(BENCHMARK_ENABLE_TESTING OFF) |
| 82 | + FetchContent_MakeAvailable( |
| 83 | + googlebenchmark) |
| 84 | +endif() |
| 85 | + |
| 86 | +if(NOT TORCHAO_INCLUDE_DIRS) |
| 87 | + set(TORCHAO_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/../../..) |
| 88 | +endif() |
| 89 | + |
| 90 | +if(NOT DEFINED TORCHAO_PARALLEL_BACKEND) |
| 91 | + set(TORCHAO_PARALLEL_BACKEND aten_openmp) |
| 92 | +endif() |
| 93 | + |
| 94 | +# Set default compiler options |
| 95 | + |
| 96 | +include(CMakePrintHelpers) |
| 97 | +include(${CMAKE_CURRENT_SOURCE_DIR}/shared_kernels/Utils.cmake) |
| 98 | + |
| 99 | +message("TORCHAO_INCLUDE_DIRS: ${TORCHAO_INCLUDE_DIRS}") |
| 100 | +include_directories(${TORCHAO_INCLUDE_DIRS}) |
| 101 | + |
| 102 | + |
| 103 | +# Build fallback kernels |
| 104 | +add_subdirectory(torch_free_kernels/fallback) |
| 105 | + |
| 106 | +# Build cpu/aarch64 kernels |
| 107 | +if(TORCHAO_BUILD_CPU_AARCH64) |
| 108 | + message(STATUS "Building with cpu/aarch64") |
| 109 | + add_compile_definitions(TORCHAO_BUILD_CPU_AARCH64) |
| 110 | + |
| 111 | + if(TORCHAO_ENABLE_ARM_NEON_DOT) |
| 112 | + message(STATUS "Building with ARM NEON dot product support") |
| 113 | + add_compile_definitions(TORCHAO_ENABLE_ARM_NEON_DOT) |
| 114 | + add_compile_options("-march=armv8.4-a+dotprod") |
| 115 | + endif() |
| 116 | + |
| 117 | + if(TORCHAO_ENABLE_ARM_I8MM) |
| 118 | + message(STATUS "Building with ARM I8MM support") |
| 119 | + add_compile_definitions(TORCHAO_ENABLE_ARM_I8MM) |
| 120 | + add_compile_options("-march=armv8.6-a") |
| 121 | + endif() |
| 122 | + |
| 123 | + if(TORCHAO_BUILD_KLEIDIAI) |
| 124 | + message(STATUS "Building with Arm KleidiAI library") |
| 125 | + add_compile_definitions(TORCHAO_ENABLE_KLEIDI) |
| 126 | + if (NOT TARGET kleidiai) |
| 127 | + include(FetchContent) |
| 128 | + # KleidiAI is an open-source library that provides optimized |
| 129 | + # performance-critical routines, also known as micro-kernels, for artificial |
| 130 | + # intelligence (AI) workloads tailored for Arm® CPUs. |
| 131 | + set(KLEIDIAI_BUILD_TESTS OFF CACHE BOOL "" FORCE) |
| 132 | + set(KLEIDIAI_BUILD_BENCHMARKS OFF CACHE BOOL "" FORCE) |
| 133 | + FetchContent_Declare(kleidiai |
| 134 | + GIT_REPOSITORY https://git.gitlab.arm.com/kleidi/kleidiai.git |
| 135 | + GIT_TAG v1.12.0 |
| 136 | + ) |
| 137 | + FetchContent_MakeAvailable(kleidiai) |
| 138 | + endif() |
| 139 | + endif() |
| 140 | + |
| 141 | + # Defines torchao_kernels_aarch64 |
| 142 | + add_subdirectory(torch_free_kernels/aarch64) |
| 143 | +endif() |
| 144 | + |
| 145 | +# Build ATen ops |
| 146 | +if(TORCHAO_BUILD_ATEN_OPS) |
| 147 | + find_package(Torch REQUIRED) |
| 148 | + set(_torchao_op_srcs_aten) |
| 149 | + list(APPEND _torchao_op_srcs_aten |
| 150 | + shared_kernels/embedding_xbit/op_embedding_xbit_aten.cpp |
| 151 | + shared_kernels/linear_8bit_act_xbit_weight/linear_8bit_act_xbit_weight.cpp |
| 152 | + shared_kernels/linear_8bit_act_xbit_weight/op_linear_8bit_act_xbit_weight_aten.cpp |
| 153 | + shared_kernels/groupwise_lowbit_weight_lut/groupwise_lowbit_weight_lut.cpp |
| 154 | + shared_kernels/groupwise_lowbit_weight_lut/op_groupwise_lowbit_weight_lut_aten.cpp |
| 155 | + ) |
| 156 | + list(TRANSFORM _torchao_op_srcs_aten PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") |
| 157 | + |
| 158 | + # Use the Python extension name if provided |
| 159 | + add_library(torchao_ops_aten SHARED ${_torchao_op_srcs_aten}) |
| 160 | + if(DEFINED TORCHAO_CMAKE_EXT_SO_NAME) |
| 161 | + message(STATUS "Setting output name to: ${TORCHAO_CMAKE_EXT_SO_NAME}.so") |
| 162 | + set_target_properties(torchao_ops_aten PROPERTIES |
| 163 | + OUTPUT_NAME ${TORCHAO_CMAKE_EXT_SO_NAME} |
| 164 | + PREFIX "" # Remove "lib" prefix for Python extensions |
| 165 | + SUFFIX ".so" # Add ".so" suffix for Python extensions |
| 166 | + ) |
| 167 | + endif() |
| 168 | + |
| 169 | + target_link_torchao_parallel_backend(torchao_ops_aten "${TORCHAO_PARALLEL_BACKEND}") |
| 170 | + if (TORCHAO_BUILD_CPU_AARCH64) |
| 171 | + target_link_libraries(torchao_ops_aten PRIVATE torchao_kernels_aarch64) |
| 172 | + if (TORCHAO_BUILD_KLEIDIAI) |
| 173 | + target_link_libraries(torchao_ops_aten PRIVATE kleidiai) |
| 174 | + endif() |
| 175 | + endif() |
| 176 | + target_link_libraries(torchao_ops_aten PRIVATE cpuinfo) |
| 177 | + target_include_directories(torchao_ops_aten PRIVATE "${TORCH_INCLUDE_DIRS}") |
| 178 | + target_link_libraries(torchao_ops_aten PRIVATE "${TORCH_LIBRARIES}") |
| 179 | + target_compile_definitions(torchao_ops_aten PRIVATE TORCHAO_SHARED_KERNELS_BUILD_ATEN=1) |
| 180 | + |
| 181 | + if (TORCHAO_BUILD_TESTS) |
| 182 | + add_subdirectory(shared_kernels/tests) |
| 183 | + endif() |
| 184 | + |
| 185 | + if (TORCHAO_BUILD_BENCHMARKS) |
| 186 | + add_subdirectory(shared_kernels/benchmarks) |
| 187 | + endif() |
| 188 | + |
| 189 | + # Install ATen targets |
| 190 | + install( |
| 191 | + TARGETS torchao_ops_aten |
| 192 | + EXPORT _targets |
| 193 | + DESTINATION lib |
| 194 | + ) |
| 195 | +endif() |
| 196 | + |
| 197 | + |
| 198 | +# Build ExecuTorch ops |
| 199 | +if(TORCHAO_BUILD_EXECUTORCH_OPS) |
| 200 | + # ExecuTorch package is not required, but EXECUTORCH_INCLUDE_DIRS and EXECUTORCH_LIBRARIES must |
| 201 | + # be defined and EXECUTORCH_LIBRARIES must include the following libraries installed by ExecuTorch: |
| 202 | + # libexecutorch.a |
| 203 | + # libextension_threadpool.a |
| 204 | + # libcpuinfo.a |
| 205 | + # libpthreadpool.a |
| 206 | + if(NOT DEFINED EXECUTORCH_INCLUDE_DIRS AND NOT DEFINED EXECUTORCH_LIBRARIES) |
| 207 | + message(WARNING "EXECUTORCH_INCLUDE_DIRS and EXECUTORCH_LIBRARIES are not defined. Looking for ExecuTorch.") |
| 208 | + find_package(ExecuTorch HINTS ${CMAKE_PREFIX_PATH}/executorch/share/cmake) |
| 209 | + endif() |
| 210 | + set(_torchao_op_srcs_executorch) |
| 211 | + list(APPEND _torchao_op_srcs_executorch |
| 212 | + shared_kernels/embedding_xbit/op_embedding_xbit_executorch.cpp |
| 213 | + shared_kernels/linear_8bit_act_xbit_weight/linear_8bit_act_xbit_weight.cpp |
| 214 | + shared_kernels/linear_8bit_act_xbit_weight/op_linear_8bit_act_xbit_weight_executorch.cpp |
| 215 | + shared_kernels/groupwise_lowbit_weight_lut/groupwise_lowbit_weight_lut.cpp |
| 216 | + shared_kernels/groupwise_lowbit_weight_lut/op_groupwise_lowbit_weight_lut_executorch.cpp) |
| 217 | + |
| 218 | + list(TRANSFORM _torchao_op_srcs_executorch PREPEND "${CMAKE_CURRENT_SOURCE_DIR}/") |
| 219 | + add_library(torchao_ops_executorch STATIC ${_torchao_op_srcs_executorch}) |
| 220 | + |
| 221 | + target_compile_definitions(torchao_ops_executorch PRIVATE TORCHAO_SHARED_KERNELS_BUILD_EXECUTORCH=1) |
| 222 | + |
| 223 | + # This links to ExecuTorch |
| 224 | + target_link_torchao_parallel_backend(torchao_ops_executorch executorch) |
| 225 | + if (TORCHAO_BUILD_CPU_AARCH64) |
| 226 | + target_link_libraries(torchao_ops_executorch PRIVATE torchao_kernels_aarch64) |
| 227 | + if (TORCHAO_BUILD_KLEIDIAI) |
| 228 | + target_link_libraries(torchao_ops_executorch PRIVATE kleidiai) |
| 229 | + endif() |
| 230 | + endif() |
| 231 | + target_link_libraries(torchao_ops_executorch PRIVATE cpuinfo) |
| 232 | +endif() |
0 commit comments