diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ebf5cd4dc39..7a7b194ff84b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,6 +83,10 @@ endif () # Enable the SPIR-V target if requested (must declare before processing dependencies) option(TARGET_SPIRV "Include SPIR-V target" OFF) +option(TARGET_VULKAN "Include Vulkan target" ON) +if (TARGET_VULKAN) + set(TARGET_SPIRV ON) # required +endif() ## # Import dependencies diff --git a/LICENSE.txt b/LICENSE.txt index 13146db88f3b..9faf448676b1 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -195,6 +195,23 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + +---- + +src/mini_vulkan.h is Copyright (c) 2014-2017 The Khronos Group Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + ---- apps/linear_algebra/include/cblas.h is licensed under the BLAS license. diff --git a/Makefile b/Makefile index 3f4bc9c66b45..b00cf1402f26 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,12 @@ # For correctness and performance tests this include halide build time and run time. For # the tests in test/generator/ this times only the halide build time. +# Halide project version +HALIDE_VERSION_MAJOR ?= 15 +HALIDE_VERSION_MINOR ?= 0 +HALIDE_VERSION_PATCH ?= 0 +HALIDE_VERSION=$(HALIDE_VERSION_MAJOR).$(HALIDE_VERSION_MINOR).$(HALIDE_VERSION_PATCH) + # Disable built-in makefile rules for all apps to avoid pointless file-system # scanning and general weirdness resulting from implicit rules. MAKEFLAGS += --no-builtin-rules @@ -124,6 +130,8 @@ WITH_OPENCL ?= not-empty WITH_METAL ?= not-empty WITH_OPENGLCOMPUTE ?= not-empty WITH_D3D12 ?= not-empty +WITH_VULKAN ?= not-empty +WITH_SPIRV ?= not-empty WITH_WEBGPU ?= not-empty WITH_INTROSPECTION ?= not-empty WITH_EXCEPTIONS ?= @@ -134,6 +142,12 @@ WITH_LLVM_INSIDE_SHARED_LIBHALIDE ?= not-empty HL_TARGET ?= host HL_JIT_TARGET ?= host +HL_VERSION_FLAGS = \ + -DHALIDE_VERSION="$(HALIDE_VERSION)" \ + -DHALIDE_VERSION_MAJOR=$(HALIDE_VERSION_MAJOR) \ + -DHALIDE_VERSION_MINOR=$(HALIDE_VERSION_MINOR) \ + -DHALIDE_VERSION_PATCH=$(HALIDE_VERSION_PATCH) + X86_CXX_FLAGS=$(if $(WITH_X86), -DWITH_X86, ) X86_LLVM_CONFIG_LIB=$(if $(WITH_X86), x86, ) @@ -176,6 +190,12 @@ EXCEPTIONS_CXX_FLAGS=$(if $(WITH_EXCEPTIONS), -DHALIDE_WITH_EXCEPTIONS -fexcepti HEXAGON_CXX_FLAGS=$(if $(WITH_HEXAGON), -DWITH_HEXAGON, ) HEXAGON_LLVM_CONFIG_LIB=$(if $(WITH_HEXAGON), hexagon, ) +SPIRV_CXX_FLAGS=$(if $(WITH_SPIRV), -DWITH_SPIRV -isystem $(ROOT_DIR)/dependencies/spirv/include, ) +SPIRV_LLVM_CONFIG_LIB=$(if $(WITH_SPIRV), , ) + +VULKAN_CXX_FLAGS=$(if $(WITH_VULKAN), -DWITH_VULKAN, ) +VULKAN_LLVM_CONFIG_LIB=$(if $(WITH_VULKAN), , ) + WEBASSEMBLY_CXX_FLAGS=$(if $(WITH_WEBASSEMBLY), -DWITH_WEBASSEMBLY, ) WEBASSEMBLY_LLVM_CONFIG_LIB=$(if $(WITH_WEBASSEMBLY), webassembly, ) @@ -198,7 +218,7 @@ LLVM_CXX_FLAGS_LIBCPP := $(findstring -stdlib=libc++, $(LLVM_CXX_FLAGS)) endif CXX_FLAGS = $(CXXFLAGS) $(CXX_WARNING_FLAGS) $(RTTI_CXX_FLAGS) -Woverloaded-virtual $(FPIC) $(OPTIMIZE) -fno-omit-frame-pointer -DCOMPILING_HALIDE - +CXX_FLAGS += $(HL_VERSION_FLAGS) CXX_FLAGS += $(LLVM_CXX_FLAGS) CXX_FLAGS += $(PTX_CXX_FLAGS) CXX_FLAGS += $(ARM_CXX_FLAGS) @@ -215,6 +235,8 @@ CXX_FLAGS += $(INTROSPECTION_CXX_FLAGS) CXX_FLAGS += $(EXCEPTIONS_CXX_FLAGS) CXX_FLAGS += $(AMDGPU_CXX_FLAGS) CXX_FLAGS += $(RISCV_CXX_FLAGS) +CXX_FLAGS += $(SPIRV_CXX_FLAGS) +CXX_FLAGS += $(VULKAN_CXX_FLAGS) CXX_FLAGS += $(WEBASSEMBLY_CXX_FLAGS) # This is required on some hosts like powerpc64le-linux-gnu because we may build @@ -241,6 +263,8 @@ LLVM_STATIC_LIBFILES = \ $(POWERPC_LLVM_CONFIG_LIB) \ $(HEXAGON_LLVM_CONFIG_LIB) \ $(AMDGPU_LLVM_CONFIG_LIB) \ + $(SPIRV_LLVM_CONFIG_LIB) \ + $(VULKAN_LLVM_CONFIG_LIB) \ $(WEBASSEMBLY_LLVM_CONFIG_LIB) \ $(RISCV_LLVM_CONFIG_LIB) @@ -265,6 +289,7 @@ TEST_LD_FLAGS = -L$(BIN_DIR) -lHalide $(COMMON_LD_FLAGS) # In the tests, some of our expectations change depending on the llvm version TEST_CXX_FLAGS += -DLLVM_VERSION=$(LLVM_VERSION_TIMES_10) +TEST_CXX_FLAGS += $(HL_VERSION_FLAGS) # In the tests, default to exporting no symbols that aren't explicitly exported TEST_CXX_FLAGS += -fvisibility=hidden -fvisibility-inlines-hidden @@ -305,6 +330,12 @@ TEST_METAL = 1 endif endif +ifneq ($(WITH_VULKAN), ) +ifneq (,$(findstring vulkan,$(HL_TARGET))) +TEST_VULKAN = 1 +endif +endif + ifeq ($(UNAME), Linux) ifneq ($(TEST_CUDA), ) CUDA_LD_FLAGS ?= -L/usr/lib/nvidia-current -lcuda @@ -312,6 +343,9 @@ endif ifneq ($(TEST_OPENCL), ) OPENCL_LD_FLAGS ?= -lOpenCL endif +ifneq ($(TEST_VULKAN), ) +VULKAN_LD_FLAGS ?= -lvulkan +endif OPENGL_LD_FLAGS ?= -lGL HOST_OS=linux endif @@ -324,6 +358,10 @@ endif ifneq ($(TEST_OPENCL), ) OPENCL_LD_FLAGS ?= -framework OpenCL endif +ifneq ($(TEST_VULKAN), ) +# The Vulkan loader is distributed as a dylib on OSX (not a framework) +VULKAN_LD_FLAGS ?= -lvulkan +endif ifneq ($(TEST_METAL), ) METAL_LD_FLAGS ?= -framework Metal -framework Foundation endif @@ -335,6 +373,10 @@ ifneq ($(TEST_OPENCL), ) TEST_CXX_FLAGS += -DTEST_OPENCL endif +ifneq ($(TEST_VULKAN), ) +TEST_CXX_FLAGS += -DTEST_VULKAN +endif + ifneq ($(TEST_METAL), ) # Using Metal APIs requires writing Objective-C++ (or Swift). Add ObjC++ # to allow tests to create and destroy Metal contexts, etc. This requires @@ -433,6 +475,7 @@ SOURCE_FILES = \ CodeGen_LLVM.cpp \ CodeGen_Metal_Dev.cpp \ CodeGen_OpenCL_Dev.cpp \ + CodeGen_Vulkan_Dev.cpp \ CodeGen_OpenGLCompute_Dev.cpp \ CodeGen_Posix.cpp \ CodeGen_PowerPC.cpp \ @@ -623,6 +666,7 @@ HEADER_FILES = \ CodeGen_LLVM.h \ CodeGen_Metal_Dev.h \ CodeGen_OpenCL_Dev.h \ + CodeGen_Vulkan_Dev.h \ CodeGen_OpenGLCompute_Dev.h \ CodeGen_Posix.h \ CodeGen_PTX_Dev.h \ @@ -853,8 +897,10 @@ RUNTIME_CPP_COMPONENTS = \ windows_profiler \ windows_threads \ windows_threads_tsan \ + windows_vulkan \ windows_yield \ write_debug_image \ + vulkan \ x86_cpu_features \ RUNTIME_LL_COMPONENTS = \ @@ -883,6 +929,7 @@ RUNTIME_EXPORTED_INCLUDES = $(INCLUDE_DIR)/HalideRuntime.h \ $(INCLUDE_DIR)/HalideRuntimeOpenGLCompute.h \ $(INCLUDE_DIR)/HalideRuntimeMetal.h \ $(INCLUDE_DIR)/HalideRuntimeQurt.h \ + $(INCLUDE_DIR)/HalideRuntimeVulkan.h \ $(INCLUDE_DIR)/HalideRuntimeWebGPU.h \ $(INCLUDE_DIR)/HalideBuffer.h \ $(INCLUDE_DIR)/HalidePyTorchHelpers.h \ @@ -1049,6 +1096,7 @@ RUNTIME_CXX_FLAGS = \ -Wno-unused-function \ -Wvla \ -Wsign-compare +RUNTIME_CXX_FLAGS += $(HL_VERSION_FLAGS) $(BUILD_DIR)/initmod.windows_%_x86_32.ll: $(SRC_DIR)/runtime/windows_%_x86.cpp $(BUILD_DIR)/clang_ok @mkdir -p $(@D) diff --git a/README.md b/README.md index 1863b113da37..276cb3bec453 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ currently targets: - CPU architectures: X86, ARM, Hexagon, PowerPC, RISC-V - Operating systems: Linux, Windows, macOS, Android, iOS, Qualcomm QuRT - GPU Compute APIs: CUDA, OpenCL, OpenGL Compute Shaders, Apple Metal, Microsoft - Direct X 12 + Direct X 12, Vulkan Rather than being a standalone programming language, Halide is embedded in C++. This means you write C++ code that builds an in-memory representation of a diff --git a/README_vulkan.md b/README_vulkan.md new file mode 100644 index 000000000000..017dd56aed73 --- /dev/null +++ b/README_vulkan.md @@ -0,0 +1,267 @@ +# Vulkan Support for Halide + +Halide supports the Khronos Vulkan framework as a compute API backend for GPU-like +devices, and compiles directly to a binary SPIR-V representation as part of its +code generation before submitting it to the Vulkan API. Both JIT and AOT usage +are supported via the `vulkan` target flag (e.g. `HL_JIT_TARGET=host-vulkan`). + +Vulkan support is actively under development, and considered *BETA* quality +at this stage. Tests are passing, but performance tuning and user testing is needed +to identify potential issues before rolling this into production. + +See [below](#current-status) for details. + +# Compiling Halide w/Vulkan Support + +You'll need to configure Halide and enable the cmake option TARGET_VULKAN (which is now ON by default). + +For example, on Linux & OSX: + +``` +% cmake -G Ninja -DTARGET_VULKAN=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=$LLVM_ROOT/lib/cmake/llvm +% cmake --build build --config Release +``` + +On Windows, you may need to specify the location of the Vulkan SDK if the paths aren't resolved by CMake automatically. For example (assuming the Vulkan SDK is installed in the default path): + +``` +C:\> cmake -G Ninja -DTARGET_VULKAN=ON -DCMAKE_BUILD_TYPE=Release -DLLVM_DIR=$LLVM_ROOT/lib/cmake/llvm -DVulkan_LIBRARY=C:\VulkanSDK\1.3.231.1\Lib\vulkan-1.lib -DVulkan_INCLUDE_DIR=C:\VulkanSDK\1.3.231.1\Include\vulkan -S . -B build +C:\> cmake --build build --config Release + +``` + +# Vulkan Runtime Environment: + +Halide has no direct dependency on Vulkan for code-generation, but the runtime +requires a working Vulkan environment to run Halide generated code. Any valid +Vulkan v1.0+ device driver should work. + +Specifically, you'll need: + +- A vendor specific Vulkan device driver +- The generic Vulkan loader library + +For AMD & NVIDIA & Intel devices, download and install the latest graphics driver +for your platform. Vulkan support should be included. + +## Windows + +To build Halide AOT generators, you'll need the Vulkan SDK (specifically the Vulkan loader library and headers): +https://sdk.lunarg.com/sdk/download/latest/windows/vulkan-sdk.exe + +For Vulkan device drivers, consult the appropriate hardware vendor for your device. A few common ones are listed below. + +- [AMD Vulkan Driver](https://www.amd.com/en/technologies/vulkan) +- [NVIDIA Vulkan Driver](https://developer.nvidia.com/vulkan-driver) +- [INTEL Vulkan Driver](https://www.intel.com/content/www/us/en/download-center/home.html) + +## Linux + +On Ubuntu Linux v22.04, the vulkan runtime is distributed in the `vulkan-tools` package. For earlier versions of Ubuntu (e.g. v20.x or v18.x) the contents of the `vulkan-tools` package was distributed as `vulkan-utils` so use that package instead. + +Proprietary drivers can be installed via 'apt' using PPA's for each vendor. Examples for AMD and NVIDIA are provided below. + +For AMD on Ubuntu v22.04: +``` +$ sudo add-apt-repository ppa:oibaf/graphics-drivers +$ sudo apt update +$ sudo apt upgrade +$ sudo apt install libvulkan1 mesa-vulkan-drivers vulkan-tools +``` + +For NVIDIA on Ubuntu v22.04: +``` +$ sudo add-apt-repository ppa:graphics-drivers/ppa +$ sudo apt update +$ sudo apt upgrade +# - replace ### with latest driver release (e.g. 515) +$ sudo apt install nvidia-driver-### nvidia-settings vulkan vulkan-tools +``` + +Note that only valid drivers for your system should be installed since there are +reports of the Vulkan loader segfaulting just by having a non-supported driver present. +Specifically, the seemingly generic `mesa-vulkan-drivers` actually includes the AMD +graphics driver, which can cause problems if installed on an NVIDIA-only system. + +## Mac + +You're better off using Halide's Metal backend instead, but it is possible to run +Vulkan apps on a Mac via the MoltenVK library: + +- [MoltenVK Project](https://github.com/KhronosGroup/MoltenVK) + +The easiest way to get the necessary dependencies is to use the official MoltenVK SDK +installer provided by LunarG: + +- [MoltenVK SDK (Latest Release)](https://sdk.lunarg.com/sdk/download/latest/mac/vulkan-sdk.dmg) + +Alternatively, if you have the [Homebrew](https://brew.sh/) package manager installed +for MacOS, you can use it to install the Vulkan Loader and MoltenVK compatibility +layer: + +``` +$ brew install vulkan-loader molten-vk +``` + +# Testing Your Vulkan Environment + +You can validate that everything is configured correctly by running the `vulkaninfo` +app (bundled in the vulkan-utils package) to make sure your device is detected (eg): + +``` +$ vulkaninfo +========== +VULKANINFO +========== + +Vulkan Instance Version: 1.3.224 + + +Instance Extensions: count = 19 +=============================== + ... + +Layers: count = 10 +================== +VK_LAYER_KHRONOS_profiles (Khronos Profiles layer) Vulkan version 1.3.224, layer version 1: + Layer Extensions: count = 0 + Devices: count = 1 + GPU id = 0 (NVIDIA GeForce RTX 3070 Ti) + Layer-Device Extensions: count = 1 + +... + +``` + +Make sure everything looks correct before continuing! + +# Targetting Vulkan + +To generate Halide code for Vulkan, simply add the `vulkan` flag to your target as well as any other optional device specific features you wish to enable for Halide: + +| Target Feature | Description | +| -- | -- | +| `vulkan` | Enables the vulkan backend | +| `vk_int8` | Allows 8-bit integer storage types to be used | +| `vk_int16` | Allows 16-bit integer storage types to be used | +| `vk_int64` | Allows 64-bit integer storage types to be used | +| `vk_float16` | Allows 16-bit floating-point values to be used for computation | +| `vk_float64` | Allows 64-bit floating-point values to be used for computation | +| `vk_v10` | Generates code compatible with the Vulkan v1.0+ API | +| `vk_v12` | Generates code compatible with the Vulkan v1.2+ API | +| `vk_v13` | Generates code compatible with the Vulkan v1.3+ API | + +Note that 32-bit integer and floating-point types are always available. All other optional device features are off by default (since they are not required by the Vulkan API, and thus must be explicitly enabled to ensure that the code being generated will be compatible with the device and API version being used for execution). + +For AOT generators add `vulkan` (and any other flags you wish to use) to the target command line option: + +``` +$ ./lesson_15_generate -g my_first_generator -o . target=host-vulkan-vk_int8-vk_int16 +``` + +For JIT apps use the `HL_JIT_TARGET` environment variable: + +``` +$ HL_JIT_TARGET=host-vulkan-vk_int8-vk_int16 ./tutorial/lesson_01_basics +``` + +# Useful Runtime Environment Variables + +To modify the default behavior of the runtime, the following environment +variables can be used to adjust the configuration of the Vulkan backend +at execution time: + +`HL_VK_LAYERS=...` will tell Halide to choose a suitable Vulkan instance +that supports the given list of layers. If not set, `VK_INSTANCE_LAYERS=...` +will be used instead. If neither are present, Halide will use the first +Vulkan compute device it can find. Multiple layers can be specified using +the appropriate environment variable list delimiter (`:` on Linux/OSX/Posix, +or `;` on Windows). + +`HL_VK_DEVICE_TYPE=...` will tell Halide to choose which type of device +to select for creating the Vulkan instance. Valid options are 'gpu', +'discrete-gpu', 'integrated-gpu', 'virtual-gpu', or 'cpu'. If not set, +Halide will search for the first 'gpu' like device it can find, or fall back +to the first compute device it can find. + +`HL_VK_ALLOC_CONFIG=...` will tell Halide to configure the Vulkan memory +allocator use the given constraints specified as 5x integer values +separated by the appropriate environment variable list delimiter +(e.g. `N:N:N:N:N` on Linux/OSX/Posix, or `N;N;N;N;N` on Windows). These values +correspond to `maximum_pool_size`, `minimum_block_size`, `maximum_block_size`, +`maximum_block_count` and `nearest_multiple`. + +The `maximum_pool_size` constraint will tell Halide to configure the +Vulkan memory allocator to never request more than N megabytes for the +entire pool of allocations for the context. This includes all resource +blocks used for suballocations. Setting this to a non-zero value will +limit the amount device memory used by Halide, which may be useful when +other applications and frameworks are competing for resources. +Default is 0 ... meaning no limit. + +The `minimum_block_size` constraint will tell Halide to configure the +Vulkan memory allocator to always request a minimum of N megabytes for +a resource block, which will be used as a pool for suballocations. +Increasing this value may improve performance while sacrificing the amount +of available device memory. Default is 32MB. + +The `maximum_block_size` constraint will tell Halide to configure the +Vulkan memory allocator to never exceed a maximum of N megabytes for a +resource block. Decreasing this value may free up more memory but may +impact performance, and/or restrict allocations to be unusably small. +Default is 0 ... meaning no limit. + +The `maximum_block_count` constraint will tell Halide to configure the +Vulkan memory allocator to never exceed a total of N block allocations. +Decreasing this value may free up more memory but may impact performance, +and/or restrict allocations. Default is 0 ... meaning no limit. + +The `nearest_multiple` constraint will tell Halide to configure the +Vulkan memory allocator to always round up the requested allocation sizes +to the given integer value. This is useful for architectures that +require specific alignments for subregions allocated within a block. +Default is 32 ... setting this to zero means no constraint. + +# Debug Environment Variables + +The following environment variables may be useful for tracking down potential +issues related to Vulkan: + +`HL_DEBUG_CODEGEN=3` will print out debug info that includees the SPIR-V +code generator used for Vulkan while it is compiling. + +`HL_SPIRV_DUMP_FILE=...` specifies a file to dump the binary SPIR-V generated +during compilation. Useful for debugging CodeGen issues. Can be inspected, +validated and disassembled via the SPIR-V tools: + +https://github.com/KhronosGroup/SPIRV-Tools + + +# Current Status + +All correctness tests are now passing on tested configs for Linux & Windows using the target `host-vulkan-vk_int8-vk_int16-vk_int64-vk_float16-vk_float64-vk_v13` on LLVM v14.x. + +MacOS passes most tests but encounters internal MoltenVK code translation issues for wide vectors, and ambiguous function calls. + +Python apps, tutorials and correctness tests are now passing, but the AOT cases are skipped since the runtime environment needs to be customized to locate the platform specific Vulkan loader library. + +Android platform support is currently being worked on. + +# Caveats: + +- Other than 32-bit floats and integers, every other data type is optional per the Vulkan spec +- Float 64-bit types can be enabled, but there aren't any native math functions available in SPIR-V +- Only one dynamically sized shared memory allocation can be used, but any number of + fixed sized allocation are supported (up to the maximum amount allowed by the device) + +# Known TODO: + +- Performance tuning of CodeGen and Runtime +- More platform support (Android is work-in-progress, RISC-V, etc) +- Adapt unsupported types to supported types (if missing vk_int8 then promote to uint32_t)? +- Better debugging utilities using the Vulkan debug hooks. +- Allow debug symbols to be stripped from SPIR-V during codegen to reduce + memory overhead for large kernels. +- Investigate floating point rounding and precision (v1.3 adds more controls) +- Investigate memory model usage (can Halide gain anything from these?) + diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake index 373c6d212028..461687982ea1 100644 --- a/cmake/HalideGeneratorHelpers.cmake +++ b/cmake/HalideGeneratorHelpers.cmake @@ -711,6 +711,11 @@ function(_Halide_target_link_gpu_libs TARGET VISIBILITY) endif () endif () + if ("${ARGN}" MATCHES "vulkan") + find_package(Vulkan REQUIRED) + target_link_libraries(${TARGET} ${VISIBILITY} Vulkan::Vulkan) + endif () + if ("${ARGN}" MATCHES "metal") find_library(FOUNDATION_LIBRARY Foundation REQUIRED) find_library(METAL_LIBRARY Metal REQUIRED) diff --git a/dependencies/spirv/include/spirv/1.0/spirv.h b/dependencies/spirv/include/spirv/1.0/spirv.h deleted file mode 100644 index bd5a9b9593aa..000000000000 --- a/dependencies/spirv/include/spirv/1.0/spirv.h +++ /dev/null @@ -1,993 +0,0 @@ -/* -** Copyright (c) 2014-2018 The Khronos Group Inc. -** -** Permission is hereby granted, free of charge, to any person obtaining a copy -** of this software and/or associated documentation files (the "Materials"), -** to deal in the Materials without restriction, including without limitation -** the rights to use, copy, modify, merge, publish, distribute, sublicense, -** and/or sell copies of the Materials, and to permit persons to whom the -** Materials are furnished to do so, subject to the following conditions: -** -** The above copyright notice and this permission notice shall be included in -** all copies or substantial portions of the Materials. -** -** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -** -** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -** IN THE MATERIALS. -*/ - -/* -** This header is automatically generated by the same tool that creates -** the Binary Section of the SPIR-V specification. -*/ - -/* -** Enumeration tokens for SPIR-V, in various styles: -** C, C++, C++11, JSON, Lua, Python -** -** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL -** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL -** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL -** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL -** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] -** -** Some tokens act like mask values, which can be OR'd together, -** while others are mutually exclusive. The mask-like ones have -** "Mask" in their name, and a parallel enum that has the shift -** amount (1 << x) for each corresponding enumerant. -*/ - -#ifndef spirv_H -#define spirv_H - -typedef unsigned int SpvId; - -#define SPV_VERSION 0x10000 -#define SPV_REVISION 12 - -static const unsigned int SpvMagicNumber = 0x07230203; -static const unsigned int SpvVersion = 0x00010000; -static const unsigned int SpvRevision = 12; -static const unsigned int SpvOpCodeMask = 0xffff; -static const unsigned int SpvWordCountShift = 16; - -typedef enum SpvSourceLanguage_ { - SpvSourceLanguageUnknown = 0, - SpvSourceLanguageESSL = 1, - SpvSourceLanguageGLSL = 2, - SpvSourceLanguageOpenCL_C = 3, - SpvSourceLanguageOpenCL_CPP = 4, - SpvSourceLanguageHLSL = 5, - SpvSourceLanguageMax = 0x7fffffff, -} SpvSourceLanguage; - -typedef enum SpvExecutionModel_ { - SpvExecutionModelVertex = 0, - SpvExecutionModelTessellationControl = 1, - SpvExecutionModelTessellationEvaluation = 2, - SpvExecutionModelGeometry = 3, - SpvExecutionModelFragment = 4, - SpvExecutionModelGLCompute = 5, - SpvExecutionModelKernel = 6, - SpvExecutionModelMax = 0x7fffffff, -} SpvExecutionModel; - -typedef enum SpvAddressingModel_ { - SpvAddressingModelLogical = 0, - SpvAddressingModelPhysical32 = 1, - SpvAddressingModelPhysical64 = 2, - SpvAddressingModelMax = 0x7fffffff, -} SpvAddressingModel; - -typedef enum SpvMemoryModel_ { - SpvMemoryModelSimple = 0, - SpvMemoryModelGLSL450 = 1, - SpvMemoryModelOpenCL = 2, - SpvMemoryModelMax = 0x7fffffff, -} SpvMemoryModel; - -typedef enum SpvExecutionMode_ { - SpvExecutionModeInvocations = 0, - SpvExecutionModeSpacingEqual = 1, - SpvExecutionModeSpacingFractionalEven = 2, - SpvExecutionModeSpacingFractionalOdd = 3, - SpvExecutionModeVertexOrderCw = 4, - SpvExecutionModeVertexOrderCcw = 5, - SpvExecutionModePixelCenterInteger = 6, - SpvExecutionModeOriginUpperLeft = 7, - SpvExecutionModeOriginLowerLeft = 8, - SpvExecutionModeEarlyFragmentTests = 9, - SpvExecutionModePointMode = 10, - SpvExecutionModeXfb = 11, - SpvExecutionModeDepthReplacing = 12, - SpvExecutionModeDepthGreater = 14, - SpvExecutionModeDepthLess = 15, - SpvExecutionModeDepthUnchanged = 16, - SpvExecutionModeLocalSize = 17, - SpvExecutionModeLocalSizeHint = 18, - SpvExecutionModeInputPoints = 19, - SpvExecutionModeInputLines = 20, - SpvExecutionModeInputLinesAdjacency = 21, - SpvExecutionModeTriangles = 22, - SpvExecutionModeInputTrianglesAdjacency = 23, - SpvExecutionModeQuads = 24, - SpvExecutionModeIsolines = 25, - SpvExecutionModeOutputVertices = 26, - SpvExecutionModeOutputPoints = 27, - SpvExecutionModeOutputLineStrip = 28, - SpvExecutionModeOutputTriangleStrip = 29, - SpvExecutionModeVecTypeHint = 30, - SpvExecutionModeContractionOff = 31, - SpvExecutionModePostDepthCoverage = 4446, - SpvExecutionModeStencilRefReplacingEXT = 5027, - SpvExecutionModeMax = 0x7fffffff, -} SpvExecutionMode; - -typedef enum SpvStorageClass_ { - SpvStorageClassUniformConstant = 0, - SpvStorageClassInput = 1, - SpvStorageClassUniform = 2, - SpvStorageClassOutput = 3, - SpvStorageClassWorkgroup = 4, - SpvStorageClassCrossWorkgroup = 5, - SpvStorageClassPrivate = 6, - SpvStorageClassFunction = 7, - SpvStorageClassGeneric = 8, - SpvStorageClassPushConstant = 9, - SpvStorageClassAtomicCounter = 10, - SpvStorageClassImage = 11, - SpvStorageClassStorageBuffer = 12, - SpvStorageClassMax = 0x7fffffff, -} SpvStorageClass; - -typedef enum SpvDim_ { - SpvDim1D = 0, - SpvDim2D = 1, - SpvDim3D = 2, - SpvDimCube = 3, - SpvDimRect = 4, - SpvDimBuffer = 5, - SpvDimSubpassData = 6, - SpvDimMax = 0x7fffffff, -} SpvDim; - -typedef enum SpvSamplerAddressingMode_ { - SpvSamplerAddressingModeNone = 0, - SpvSamplerAddressingModeClampToEdge = 1, - SpvSamplerAddressingModeClamp = 2, - SpvSamplerAddressingModeRepeat = 3, - SpvSamplerAddressingModeRepeatMirrored = 4, - SpvSamplerAddressingModeMax = 0x7fffffff, -} SpvSamplerAddressingMode; - -typedef enum SpvSamplerFilterMode_ { - SpvSamplerFilterModeNearest = 0, - SpvSamplerFilterModeLinear = 1, - SpvSamplerFilterModeMax = 0x7fffffff, -} SpvSamplerFilterMode; - -typedef enum SpvImageFormat_ { - SpvImageFormatUnknown = 0, - SpvImageFormatRgba32f = 1, - SpvImageFormatRgba16f = 2, - SpvImageFormatR32f = 3, - SpvImageFormatRgba8 = 4, - SpvImageFormatRgba8Snorm = 5, - SpvImageFormatRg32f = 6, - SpvImageFormatRg16f = 7, - SpvImageFormatR11fG11fB10f = 8, - SpvImageFormatR16f = 9, - SpvImageFormatRgba16 = 10, - SpvImageFormatRgb10A2 = 11, - SpvImageFormatRg16 = 12, - SpvImageFormatRg8 = 13, - SpvImageFormatR16 = 14, - SpvImageFormatR8 = 15, - SpvImageFormatRgba16Snorm = 16, - SpvImageFormatRg16Snorm = 17, - SpvImageFormatRg8Snorm = 18, - SpvImageFormatR16Snorm = 19, - SpvImageFormatR8Snorm = 20, - SpvImageFormatRgba32i = 21, - SpvImageFormatRgba16i = 22, - SpvImageFormatRgba8i = 23, - SpvImageFormatR32i = 24, - SpvImageFormatRg32i = 25, - SpvImageFormatRg16i = 26, - SpvImageFormatRg8i = 27, - SpvImageFormatR16i = 28, - SpvImageFormatR8i = 29, - SpvImageFormatRgba32ui = 30, - SpvImageFormatRgba16ui = 31, - SpvImageFormatRgba8ui = 32, - SpvImageFormatR32ui = 33, - SpvImageFormatRgb10a2ui = 34, - SpvImageFormatRg32ui = 35, - SpvImageFormatRg16ui = 36, - SpvImageFormatRg8ui = 37, - SpvImageFormatR16ui = 38, - SpvImageFormatR8ui = 39, - SpvImageFormatMax = 0x7fffffff, -} SpvImageFormat; - -typedef enum SpvImageChannelOrder_ { - SpvImageChannelOrderR = 0, - SpvImageChannelOrderA = 1, - SpvImageChannelOrderRG = 2, - SpvImageChannelOrderRA = 3, - SpvImageChannelOrderRGB = 4, - SpvImageChannelOrderRGBA = 5, - SpvImageChannelOrderBGRA = 6, - SpvImageChannelOrderARGB = 7, - SpvImageChannelOrderIntensity = 8, - SpvImageChannelOrderLuminance = 9, - SpvImageChannelOrderRx = 10, - SpvImageChannelOrderRGx = 11, - SpvImageChannelOrderRGBx = 12, - SpvImageChannelOrderDepth = 13, - SpvImageChannelOrderDepthStencil = 14, - SpvImageChannelOrdersRGB = 15, - SpvImageChannelOrdersRGBx = 16, - SpvImageChannelOrdersRGBA = 17, - SpvImageChannelOrdersBGRA = 18, - SpvImageChannelOrderABGR = 19, - SpvImageChannelOrderMax = 0x7fffffff, -} SpvImageChannelOrder; - -typedef enum SpvImageChannelDataType_ { - SpvImageChannelDataTypeSnormInt8 = 0, - SpvImageChannelDataTypeSnormInt16 = 1, - SpvImageChannelDataTypeUnormInt8 = 2, - SpvImageChannelDataTypeUnormInt16 = 3, - SpvImageChannelDataTypeUnormShort565 = 4, - SpvImageChannelDataTypeUnormShort555 = 5, - SpvImageChannelDataTypeUnormInt101010 = 6, - SpvImageChannelDataTypeSignedInt8 = 7, - SpvImageChannelDataTypeSignedInt16 = 8, - SpvImageChannelDataTypeSignedInt32 = 9, - SpvImageChannelDataTypeUnsignedInt8 = 10, - SpvImageChannelDataTypeUnsignedInt16 = 11, - SpvImageChannelDataTypeUnsignedInt32 = 12, - SpvImageChannelDataTypeHalfFloat = 13, - SpvImageChannelDataTypeFloat = 14, - SpvImageChannelDataTypeUnormInt24 = 15, - SpvImageChannelDataTypeUnormInt101010_2 = 16, - SpvImageChannelDataTypeMax = 0x7fffffff, -} SpvImageChannelDataType; - -typedef enum SpvImageOperandsShift_ { - SpvImageOperandsBiasShift = 0, - SpvImageOperandsLodShift = 1, - SpvImageOperandsGradShift = 2, - SpvImageOperandsConstOffsetShift = 3, - SpvImageOperandsOffsetShift = 4, - SpvImageOperandsConstOffsetsShift = 5, - SpvImageOperandsSampleShift = 6, - SpvImageOperandsMinLodShift = 7, - SpvImageOperandsMax = 0x7fffffff, -} SpvImageOperandsShift; - -typedef enum SpvImageOperandsMask_ { - SpvImageOperandsMaskNone = 0, - SpvImageOperandsBiasMask = 0x00000001, - SpvImageOperandsLodMask = 0x00000002, - SpvImageOperandsGradMask = 0x00000004, - SpvImageOperandsConstOffsetMask = 0x00000008, - SpvImageOperandsOffsetMask = 0x00000010, - SpvImageOperandsConstOffsetsMask = 0x00000020, - SpvImageOperandsSampleMask = 0x00000040, - SpvImageOperandsMinLodMask = 0x00000080, -} SpvImageOperandsMask; - -typedef enum SpvFPFastMathModeShift_ { - SpvFPFastMathModeNotNaNShift = 0, - SpvFPFastMathModeNotInfShift = 1, - SpvFPFastMathModeNSZShift = 2, - SpvFPFastMathModeAllowRecipShift = 3, - SpvFPFastMathModeFastShift = 4, - SpvFPFastMathModeMax = 0x7fffffff, -} SpvFPFastMathModeShift; - -typedef enum SpvFPFastMathModeMask_ { - SpvFPFastMathModeMaskNone = 0, - SpvFPFastMathModeNotNaNMask = 0x00000001, - SpvFPFastMathModeNotInfMask = 0x00000002, - SpvFPFastMathModeNSZMask = 0x00000004, - SpvFPFastMathModeAllowRecipMask = 0x00000008, - SpvFPFastMathModeFastMask = 0x00000010, -} SpvFPFastMathModeMask; - -typedef enum SpvFPRoundingMode_ { - SpvFPRoundingModeRTE = 0, - SpvFPRoundingModeRTZ = 1, - SpvFPRoundingModeRTP = 2, - SpvFPRoundingModeRTN = 3, - SpvFPRoundingModeMax = 0x7fffffff, -} SpvFPRoundingMode; - -typedef enum SpvLinkageType_ { - SpvLinkageTypeExport = 0, - SpvLinkageTypeImport = 1, - SpvLinkageTypeMax = 0x7fffffff, -} SpvLinkageType; - -typedef enum SpvAccessQualifier_ { - SpvAccessQualifierReadOnly = 0, - SpvAccessQualifierWriteOnly = 1, - SpvAccessQualifierReadWrite = 2, - SpvAccessQualifierMax = 0x7fffffff, -} SpvAccessQualifier; - -typedef enum SpvFunctionParameterAttribute_ { - SpvFunctionParameterAttributeZext = 0, - SpvFunctionParameterAttributeSext = 1, - SpvFunctionParameterAttributeByVal = 2, - SpvFunctionParameterAttributeSret = 3, - SpvFunctionParameterAttributeNoAlias = 4, - SpvFunctionParameterAttributeNoCapture = 5, - SpvFunctionParameterAttributeNoWrite = 6, - SpvFunctionParameterAttributeNoReadWrite = 7, - SpvFunctionParameterAttributeMax = 0x7fffffff, -} SpvFunctionParameterAttribute; - -typedef enum SpvDecoration_ { - SpvDecorationRelaxedPrecision = 0, - SpvDecorationSpecId = 1, - SpvDecorationBlock = 2, - SpvDecorationBufferBlock = 3, - SpvDecorationRowMajor = 4, - SpvDecorationColMajor = 5, - SpvDecorationArrayStride = 6, - SpvDecorationMatrixStride = 7, - SpvDecorationGLSLShared = 8, - SpvDecorationGLSLPacked = 9, - SpvDecorationCPacked = 10, - SpvDecorationBuiltIn = 11, - SpvDecorationNoPerspective = 13, - SpvDecorationFlat = 14, - SpvDecorationPatch = 15, - SpvDecorationCentroid = 16, - SpvDecorationSample = 17, - SpvDecorationInvariant = 18, - SpvDecorationRestrict = 19, - SpvDecorationAliased = 20, - SpvDecorationVolatile = 21, - SpvDecorationConstant = 22, - SpvDecorationCoherent = 23, - SpvDecorationNonWritable = 24, - SpvDecorationNonReadable = 25, - SpvDecorationUniform = 26, - SpvDecorationSaturatedConversion = 28, - SpvDecorationStream = 29, - SpvDecorationLocation = 30, - SpvDecorationComponent = 31, - SpvDecorationIndex = 32, - SpvDecorationBinding = 33, - SpvDecorationDescriptorSet = 34, - SpvDecorationOffset = 35, - SpvDecorationXfbBuffer = 36, - SpvDecorationXfbStride = 37, - SpvDecorationFuncParamAttr = 38, - SpvDecorationFPRoundingMode = 39, - SpvDecorationFPFastMathMode = 40, - SpvDecorationLinkageAttributes = 41, - SpvDecorationNoContraction = 42, - SpvDecorationInputAttachmentIndex = 43, - SpvDecorationAlignment = 44, - SpvDecorationExplicitInterpAMD = 4999, - SpvDecorationOverrideCoverageNV = 5248, - SpvDecorationPassthroughNV = 5250, - SpvDecorationViewportRelativeNV = 5252, - SpvDecorationSecondaryViewportRelativeNV = 5256, - SpvDecorationHlslCounterBufferGOOGLE = 5634, - SpvDecorationHlslSemanticGOOGLE = 5635, - SpvDecorationMax = 0x7fffffff, -} SpvDecoration; - -typedef enum SpvBuiltIn_ { - SpvBuiltInPosition = 0, - SpvBuiltInPointSize = 1, - SpvBuiltInClipDistance = 3, - SpvBuiltInCullDistance = 4, - SpvBuiltInVertexId = 5, - SpvBuiltInInstanceId = 6, - SpvBuiltInPrimitiveId = 7, - SpvBuiltInInvocationId = 8, - SpvBuiltInLayer = 9, - SpvBuiltInViewportIndex = 10, - SpvBuiltInTessLevelOuter = 11, - SpvBuiltInTessLevelInner = 12, - SpvBuiltInTessCoord = 13, - SpvBuiltInPatchVertices = 14, - SpvBuiltInFragCoord = 15, - SpvBuiltInPointCoord = 16, - SpvBuiltInFrontFacing = 17, - SpvBuiltInSampleId = 18, - SpvBuiltInSamplePosition = 19, - SpvBuiltInSampleMask = 20, - SpvBuiltInFragDepth = 22, - SpvBuiltInHelperInvocation = 23, - SpvBuiltInNumWorkgroups = 24, - SpvBuiltInWorkgroupSize = 25, - SpvBuiltInWorkgroupId = 26, - SpvBuiltInLocalInvocationId = 27, - SpvBuiltInGlobalInvocationId = 28, - SpvBuiltInLocalInvocationIndex = 29, - SpvBuiltInWorkDim = 30, - SpvBuiltInGlobalSize = 31, - SpvBuiltInEnqueuedWorkgroupSize = 32, - SpvBuiltInGlobalOffset = 33, - SpvBuiltInGlobalLinearId = 34, - SpvBuiltInSubgroupSize = 36, - SpvBuiltInSubgroupMaxSize = 37, - SpvBuiltInNumSubgroups = 38, - SpvBuiltInNumEnqueuedSubgroups = 39, - SpvBuiltInSubgroupId = 40, - SpvBuiltInSubgroupLocalInvocationId = 41, - SpvBuiltInVertexIndex = 42, - SpvBuiltInInstanceIndex = 43, - SpvBuiltInSubgroupEqMaskKHR = 4416, - SpvBuiltInSubgroupGeMaskKHR = 4417, - SpvBuiltInSubgroupGtMaskKHR = 4418, - SpvBuiltInSubgroupLeMaskKHR = 4419, - SpvBuiltInSubgroupLtMaskKHR = 4420, - SpvBuiltInBaseVertex = 4424, - SpvBuiltInBaseInstance = 4425, - SpvBuiltInDrawIndex = 4426, - SpvBuiltInDeviceIndex = 4438, - SpvBuiltInViewIndex = 4440, - SpvBuiltInBaryCoordNoPerspAMD = 4992, - SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, - SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, - SpvBuiltInBaryCoordSmoothAMD = 4995, - SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, - SpvBuiltInBaryCoordSmoothSampleAMD = 4997, - SpvBuiltInBaryCoordPullModelAMD = 4998, - SpvBuiltInFragStencilRefEXT = 5014, - SpvBuiltInViewportMaskNV = 5253, - SpvBuiltInSecondaryPositionNV = 5257, - SpvBuiltInSecondaryViewportMaskNV = 5258, - SpvBuiltInPositionPerViewNV = 5261, - SpvBuiltInViewportMaskPerViewNV = 5262, - SpvBuiltInMax = 0x7fffffff, -} SpvBuiltIn; - -typedef enum SpvSelectionControlShift_ { - SpvSelectionControlFlattenShift = 0, - SpvSelectionControlDontFlattenShift = 1, - SpvSelectionControlMax = 0x7fffffff, -} SpvSelectionControlShift; - -typedef enum SpvSelectionControlMask_ { - SpvSelectionControlMaskNone = 0, - SpvSelectionControlFlattenMask = 0x00000001, - SpvSelectionControlDontFlattenMask = 0x00000002, -} SpvSelectionControlMask; - -typedef enum SpvLoopControlShift_ { - SpvLoopControlUnrollShift = 0, - SpvLoopControlDontUnrollShift = 1, - SpvLoopControlMax = 0x7fffffff, -} SpvLoopControlShift; - -typedef enum SpvLoopControlMask_ { - SpvLoopControlMaskNone = 0, - SpvLoopControlUnrollMask = 0x00000001, - SpvLoopControlDontUnrollMask = 0x00000002, -} SpvLoopControlMask; - -typedef enum SpvFunctionControlShift_ { - SpvFunctionControlInlineShift = 0, - SpvFunctionControlDontInlineShift = 1, - SpvFunctionControlPureShift = 2, - SpvFunctionControlConstShift = 3, - SpvFunctionControlMax = 0x7fffffff, -} SpvFunctionControlShift; - -typedef enum SpvFunctionControlMask_ { - SpvFunctionControlMaskNone = 0, - SpvFunctionControlInlineMask = 0x00000001, - SpvFunctionControlDontInlineMask = 0x00000002, - SpvFunctionControlPureMask = 0x00000004, - SpvFunctionControlConstMask = 0x00000008, -} SpvFunctionControlMask; - -typedef enum SpvMemorySemanticsShift_ { - SpvMemorySemanticsAcquireShift = 1, - SpvMemorySemanticsReleaseShift = 2, - SpvMemorySemanticsAcquireReleaseShift = 3, - SpvMemorySemanticsSequentiallyConsistentShift = 4, - SpvMemorySemanticsUniformMemoryShift = 6, - SpvMemorySemanticsSubgroupMemoryShift = 7, - SpvMemorySemanticsWorkgroupMemoryShift = 8, - SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, - SpvMemorySemanticsAtomicCounterMemoryShift = 10, - SpvMemorySemanticsImageMemoryShift = 11, - SpvMemorySemanticsMax = 0x7fffffff, -} SpvMemorySemanticsShift; - -typedef enum SpvMemorySemanticsMask_ { - SpvMemorySemanticsMaskNone = 0, - SpvMemorySemanticsAcquireMask = 0x00000002, - SpvMemorySemanticsReleaseMask = 0x00000004, - SpvMemorySemanticsAcquireReleaseMask = 0x00000008, - SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, - SpvMemorySemanticsUniformMemoryMask = 0x00000040, - SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, - SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, - SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, - SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, - SpvMemorySemanticsImageMemoryMask = 0x00000800, -} SpvMemorySemanticsMask; - -typedef enum SpvMemoryAccessShift_ { - SpvMemoryAccessVolatileShift = 0, - SpvMemoryAccessAlignedShift = 1, - SpvMemoryAccessNontemporalShift = 2, - SpvMemoryAccessMax = 0x7fffffff, -} SpvMemoryAccessShift; - -typedef enum SpvMemoryAccessMask_ { - SpvMemoryAccessMaskNone = 0, - SpvMemoryAccessVolatileMask = 0x00000001, - SpvMemoryAccessAlignedMask = 0x00000002, - SpvMemoryAccessNontemporalMask = 0x00000004, -} SpvMemoryAccessMask; - -typedef enum SpvScope_ { - SpvScopeCrossDevice = 0, - SpvScopeDevice = 1, - SpvScopeWorkgroup = 2, - SpvScopeSubgroup = 3, - SpvScopeInvocation = 4, - SpvScopeMax = 0x7fffffff, -} SpvScope; - -typedef enum SpvGroupOperation_ { - SpvGroupOperationReduce = 0, - SpvGroupOperationInclusiveScan = 1, - SpvGroupOperationExclusiveScan = 2, - SpvGroupOperationMax = 0x7fffffff, -} SpvGroupOperation; - -typedef enum SpvKernelEnqueueFlags_ { - SpvKernelEnqueueFlagsNoWait = 0, - SpvKernelEnqueueFlagsWaitKernel = 1, - SpvKernelEnqueueFlagsWaitWorkGroup = 2, - SpvKernelEnqueueFlagsMax = 0x7fffffff, -} SpvKernelEnqueueFlags; - -typedef enum SpvKernelProfilingInfoShift_ { - SpvKernelProfilingInfoCmdExecTimeShift = 0, - SpvKernelProfilingInfoMax = 0x7fffffff, -} SpvKernelProfilingInfoShift; - -typedef enum SpvKernelProfilingInfoMask_ { - SpvKernelProfilingInfoMaskNone = 0, - SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, -} SpvKernelProfilingInfoMask; - -typedef enum SpvCapability_ { - SpvCapabilityMatrix = 0, - SpvCapabilityShader = 1, - SpvCapabilityGeometry = 2, - SpvCapabilityTessellation = 3, - SpvCapabilityAddresses = 4, - SpvCapabilityLinkage = 5, - SpvCapabilityKernel = 6, - SpvCapabilityVector16 = 7, - SpvCapabilityFloat16Buffer = 8, - SpvCapabilityFloat16 = 9, - SpvCapabilityFloat64 = 10, - SpvCapabilityInt64 = 11, - SpvCapabilityInt64Atomics = 12, - SpvCapabilityImageBasic = 13, - SpvCapabilityImageReadWrite = 14, - SpvCapabilityImageMipmap = 15, - SpvCapabilityPipes = 17, - SpvCapabilityGroups = 18, - SpvCapabilityDeviceEnqueue = 19, - SpvCapabilityLiteralSampler = 20, - SpvCapabilityAtomicStorage = 21, - SpvCapabilityInt16 = 22, - SpvCapabilityTessellationPointSize = 23, - SpvCapabilityGeometryPointSize = 24, - SpvCapabilityImageGatherExtended = 25, - SpvCapabilityStorageImageMultisample = 27, - SpvCapabilityUniformBufferArrayDynamicIndexing = 28, - SpvCapabilitySampledImageArrayDynamicIndexing = 29, - SpvCapabilityStorageBufferArrayDynamicIndexing = 30, - SpvCapabilityStorageImageArrayDynamicIndexing = 31, - SpvCapabilityClipDistance = 32, - SpvCapabilityCullDistance = 33, - SpvCapabilityImageCubeArray = 34, - SpvCapabilitySampleRateShading = 35, - SpvCapabilityImageRect = 36, - SpvCapabilitySampledRect = 37, - SpvCapabilityGenericPointer = 38, - SpvCapabilityInt8 = 39, - SpvCapabilityInputAttachment = 40, - SpvCapabilitySparseResidency = 41, - SpvCapabilityMinLod = 42, - SpvCapabilitySampled1D = 43, - SpvCapabilityImage1D = 44, - SpvCapabilitySampledCubeArray = 45, - SpvCapabilitySampledBuffer = 46, - SpvCapabilityImageBuffer = 47, - SpvCapabilityImageMSArray = 48, - SpvCapabilityStorageImageExtendedFormats = 49, - SpvCapabilityImageQuery = 50, - SpvCapabilityDerivativeControl = 51, - SpvCapabilityInterpolationFunction = 52, - SpvCapabilityTransformFeedback = 53, - SpvCapabilityGeometryStreams = 54, - SpvCapabilityStorageImageReadWithoutFormat = 55, - SpvCapabilityStorageImageWriteWithoutFormat = 56, - SpvCapabilityMultiViewport = 57, - SpvCapabilitySubgroupBallotKHR = 4423, - SpvCapabilityDrawParameters = 4427, - SpvCapabilitySubgroupVoteKHR = 4431, - SpvCapabilityStorageBuffer16BitAccess = 4433, - SpvCapabilityStorageUniformBufferBlock16 = 4433, - SpvCapabilityStorageUniform16 = 4434, - SpvCapabilityUniformAndStorageBuffer16BitAccess = 4434, - SpvCapabilityStoragePushConstant16 = 4435, - SpvCapabilityStorageInputOutput16 = 4436, - SpvCapabilityDeviceGroup = 4437, - SpvCapabilityMultiView = 4439, - SpvCapabilityVariablePointersStorageBuffer = 4441, - SpvCapabilityVariablePointers = 4442, - SpvCapabilityAtomicStorageOps = 4445, - SpvCapabilitySampleMaskPostDepthCoverage = 4447, - SpvCapabilityImageGatherBiasLodAMD = 5009, - SpvCapabilityFragmentMaskAMD = 5010, - SpvCapabilityStencilExportEXT = 5013, - SpvCapabilityImageReadWriteLodAMD = 5015, - SpvCapabilitySampleMaskOverrideCoverageNV = 5249, - SpvCapabilityGeometryShaderPassthroughNV = 5251, - SpvCapabilityShaderViewportIndexLayerEXT = 5254, - SpvCapabilityShaderViewportIndexLayerNV = 5254, - SpvCapabilityShaderViewportMaskNV = 5255, - SpvCapabilityShaderStereoViewNV = 5259, - SpvCapabilityPerViewAttributesNV = 5260, - SpvCapabilitySubgroupShuffleINTEL = 5568, - SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, - SpvCapabilitySubgroupImageBlockIOINTEL = 5570, - SpvCapabilityMax = 0x7fffffff, -} SpvCapability; - -typedef enum SpvOp_ { - SpvOpNop = 0, - SpvOpUndef = 1, - SpvOpSourceContinued = 2, - SpvOpSource = 3, - SpvOpSourceExtension = 4, - SpvOpName = 5, - SpvOpMemberName = 6, - SpvOpString = 7, - SpvOpLine = 8, - SpvOpExtension = 10, - SpvOpExtInstImport = 11, - SpvOpExtInst = 12, - SpvOpMemoryModel = 14, - SpvOpEntryPoint = 15, - SpvOpExecutionMode = 16, - SpvOpCapability = 17, - SpvOpTypeVoid = 19, - SpvOpTypeBool = 20, - SpvOpTypeInt = 21, - SpvOpTypeFloat = 22, - SpvOpTypeVector = 23, - SpvOpTypeMatrix = 24, - SpvOpTypeImage = 25, - SpvOpTypeSampler = 26, - SpvOpTypeSampledImage = 27, - SpvOpTypeArray = 28, - SpvOpTypeRuntimeArray = 29, - SpvOpTypeStruct = 30, - SpvOpTypeOpaque = 31, - SpvOpTypePointer = 32, - SpvOpTypeFunction = 33, - SpvOpTypeEvent = 34, - SpvOpTypeDeviceEvent = 35, - SpvOpTypeReserveId = 36, - SpvOpTypeQueue = 37, - SpvOpTypePipe = 38, - SpvOpTypeForwardPointer = 39, - SpvOpConstantTrue = 41, - SpvOpConstantFalse = 42, - SpvOpConstant = 43, - SpvOpConstantComposite = 44, - SpvOpConstantSampler = 45, - SpvOpConstantNull = 46, - SpvOpSpecConstantTrue = 48, - SpvOpSpecConstantFalse = 49, - SpvOpSpecConstant = 50, - SpvOpSpecConstantComposite = 51, - SpvOpSpecConstantOp = 52, - SpvOpFunction = 54, - SpvOpFunctionParameter = 55, - SpvOpFunctionEnd = 56, - SpvOpFunctionCall = 57, - SpvOpVariable = 59, - SpvOpImageTexelPointer = 60, - SpvOpLoad = 61, - SpvOpStore = 62, - SpvOpCopyMemory = 63, - SpvOpCopyMemorySized = 64, - SpvOpAccessChain = 65, - SpvOpInBoundsAccessChain = 66, - SpvOpPtrAccessChain = 67, - SpvOpArrayLength = 68, - SpvOpGenericPtrMemSemantics = 69, - SpvOpInBoundsPtrAccessChain = 70, - SpvOpDecorate = 71, - SpvOpMemberDecorate = 72, - SpvOpDecorationGroup = 73, - SpvOpGroupDecorate = 74, - SpvOpGroupMemberDecorate = 75, - SpvOpVectorExtractDynamic = 77, - SpvOpVectorInsertDynamic = 78, - SpvOpVectorShuffle = 79, - SpvOpCompositeConstruct = 80, - SpvOpCompositeExtract = 81, - SpvOpCompositeInsert = 82, - SpvOpCopyObject = 83, - SpvOpTranspose = 84, - SpvOpSampledImage = 86, - SpvOpImageSampleImplicitLod = 87, - SpvOpImageSampleExplicitLod = 88, - SpvOpImageSampleDrefImplicitLod = 89, - SpvOpImageSampleDrefExplicitLod = 90, - SpvOpImageSampleProjImplicitLod = 91, - SpvOpImageSampleProjExplicitLod = 92, - SpvOpImageSampleProjDrefImplicitLod = 93, - SpvOpImageSampleProjDrefExplicitLod = 94, - SpvOpImageFetch = 95, - SpvOpImageGather = 96, - SpvOpImageDrefGather = 97, - SpvOpImageRead = 98, - SpvOpImageWrite = 99, - SpvOpImage = 100, - SpvOpImageQueryFormat = 101, - SpvOpImageQueryOrder = 102, - SpvOpImageQuerySizeLod = 103, - SpvOpImageQuerySize = 104, - SpvOpImageQueryLod = 105, - SpvOpImageQueryLevels = 106, - SpvOpImageQuerySamples = 107, - SpvOpConvertFToU = 109, - SpvOpConvertFToS = 110, - SpvOpConvertSToF = 111, - SpvOpConvertUToF = 112, - SpvOpUConvert = 113, - SpvOpSConvert = 114, - SpvOpFConvert = 115, - SpvOpQuantizeToF16 = 116, - SpvOpConvertPtrToU = 117, - SpvOpSatConvertSToU = 118, - SpvOpSatConvertUToS = 119, - SpvOpConvertUToPtr = 120, - SpvOpPtrCastToGeneric = 121, - SpvOpGenericCastToPtr = 122, - SpvOpGenericCastToPtrExplicit = 123, - SpvOpBitcast = 124, - SpvOpSNegate = 126, - SpvOpFNegate = 127, - SpvOpIAdd = 128, - SpvOpFAdd = 129, - SpvOpISub = 130, - SpvOpFSub = 131, - SpvOpIMul = 132, - SpvOpFMul = 133, - SpvOpUDiv = 134, - SpvOpSDiv = 135, - SpvOpFDiv = 136, - SpvOpUMod = 137, - SpvOpSRem = 138, - SpvOpSMod = 139, - SpvOpFRem = 140, - SpvOpFMod = 141, - SpvOpVectorTimesScalar = 142, - SpvOpMatrixTimesScalar = 143, - SpvOpVectorTimesMatrix = 144, - SpvOpMatrixTimesVector = 145, - SpvOpMatrixTimesMatrix = 146, - SpvOpOuterProduct = 147, - SpvOpDot = 148, - SpvOpIAddCarry = 149, - SpvOpISubBorrow = 150, - SpvOpUMulExtended = 151, - SpvOpSMulExtended = 152, - SpvOpAny = 154, - SpvOpAll = 155, - SpvOpIsNan = 156, - SpvOpIsInf = 157, - SpvOpIsFinite = 158, - SpvOpIsNormal = 159, - SpvOpSignBitSet = 160, - SpvOpLessOrGreater = 161, - SpvOpOrdered = 162, - SpvOpUnordered = 163, - SpvOpLogicalEqual = 164, - SpvOpLogicalNotEqual = 165, - SpvOpLogicalOr = 166, - SpvOpLogicalAnd = 167, - SpvOpLogicalNot = 168, - SpvOpSelect = 169, - SpvOpIEqual = 170, - SpvOpINotEqual = 171, - SpvOpUGreaterThan = 172, - SpvOpSGreaterThan = 173, - SpvOpUGreaterThanEqual = 174, - SpvOpSGreaterThanEqual = 175, - SpvOpULessThan = 176, - SpvOpSLessThan = 177, - SpvOpULessThanEqual = 178, - SpvOpSLessThanEqual = 179, - SpvOpFOrdEqual = 180, - SpvOpFUnordEqual = 181, - SpvOpFOrdNotEqual = 182, - SpvOpFUnordNotEqual = 183, - SpvOpFOrdLessThan = 184, - SpvOpFUnordLessThan = 185, - SpvOpFOrdGreaterThan = 186, - SpvOpFUnordGreaterThan = 187, - SpvOpFOrdLessThanEqual = 188, - SpvOpFUnordLessThanEqual = 189, - SpvOpFOrdGreaterThanEqual = 190, - SpvOpFUnordGreaterThanEqual = 191, - SpvOpShiftRightLogical = 194, - SpvOpShiftRightArithmetic = 195, - SpvOpShiftLeftLogical = 196, - SpvOpBitwiseOr = 197, - SpvOpBitwiseXor = 198, - SpvOpBitwiseAnd = 199, - SpvOpNot = 200, - SpvOpBitFieldInsert = 201, - SpvOpBitFieldSExtract = 202, - SpvOpBitFieldUExtract = 203, - SpvOpBitReverse = 204, - SpvOpBitCount = 205, - SpvOpDPdx = 207, - SpvOpDPdy = 208, - SpvOpFwidth = 209, - SpvOpDPdxFine = 210, - SpvOpDPdyFine = 211, - SpvOpFwidthFine = 212, - SpvOpDPdxCoarse = 213, - SpvOpDPdyCoarse = 214, - SpvOpFwidthCoarse = 215, - SpvOpEmitVertex = 218, - SpvOpEndPrimitive = 219, - SpvOpEmitStreamVertex = 220, - SpvOpEndStreamPrimitive = 221, - SpvOpControlBarrier = 224, - SpvOpMemoryBarrier = 225, - SpvOpAtomicLoad = 227, - SpvOpAtomicStore = 228, - SpvOpAtomicExchange = 229, - SpvOpAtomicCompareExchange = 230, - SpvOpAtomicCompareExchangeWeak = 231, - SpvOpAtomicIIncrement = 232, - SpvOpAtomicIDecrement = 233, - SpvOpAtomicIAdd = 234, - SpvOpAtomicISub = 235, - SpvOpAtomicSMin = 236, - SpvOpAtomicUMin = 237, - SpvOpAtomicSMax = 238, - SpvOpAtomicUMax = 239, - SpvOpAtomicAnd = 240, - SpvOpAtomicOr = 241, - SpvOpAtomicXor = 242, - SpvOpPhi = 245, - SpvOpLoopMerge = 246, - SpvOpSelectionMerge = 247, - SpvOpLabel = 248, - SpvOpBranch = 249, - SpvOpBranchConditional = 250, - SpvOpSwitch = 251, - SpvOpKill = 252, - SpvOpReturn = 253, - SpvOpReturnValue = 254, - SpvOpUnreachable = 255, - SpvOpLifetimeStart = 256, - SpvOpLifetimeStop = 257, - SpvOpGroupAsyncCopy = 259, - SpvOpGroupWaitEvents = 260, - SpvOpGroupAll = 261, - SpvOpGroupAny = 262, - SpvOpGroupBroadcast = 263, - SpvOpGroupIAdd = 264, - SpvOpGroupFAdd = 265, - SpvOpGroupFMin = 266, - SpvOpGroupUMin = 267, - SpvOpGroupSMin = 268, - SpvOpGroupFMax = 269, - SpvOpGroupUMax = 270, - SpvOpGroupSMax = 271, - SpvOpReadPipe = 274, - SpvOpWritePipe = 275, - SpvOpReservedReadPipe = 276, - SpvOpReservedWritePipe = 277, - SpvOpReserveReadPipePackets = 278, - SpvOpReserveWritePipePackets = 279, - SpvOpCommitReadPipe = 280, - SpvOpCommitWritePipe = 281, - SpvOpIsValidReserveId = 282, - SpvOpGetNumPipePackets = 283, - SpvOpGetMaxPipePackets = 284, - SpvOpGroupReserveReadPipePackets = 285, - SpvOpGroupReserveWritePipePackets = 286, - SpvOpGroupCommitReadPipe = 287, - SpvOpGroupCommitWritePipe = 288, - SpvOpEnqueueMarker = 291, - SpvOpEnqueueKernel = 292, - SpvOpGetKernelNDrangeSubGroupCount = 293, - SpvOpGetKernelNDrangeMaxSubGroupSize = 294, - SpvOpGetKernelWorkGroupSize = 295, - SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, - SpvOpRetainEvent = 297, - SpvOpReleaseEvent = 298, - SpvOpCreateUserEvent = 299, - SpvOpIsValidEvent = 300, - SpvOpSetUserEventStatus = 301, - SpvOpCaptureEventProfilingInfo = 302, - SpvOpGetDefaultQueue = 303, - SpvOpBuildNDRange = 304, - SpvOpImageSparseSampleImplicitLod = 305, - SpvOpImageSparseSampleExplicitLod = 306, - SpvOpImageSparseSampleDrefImplicitLod = 307, - SpvOpImageSparseSampleDrefExplicitLod = 308, - SpvOpImageSparseSampleProjImplicitLod = 309, - SpvOpImageSparseSampleProjExplicitLod = 310, - SpvOpImageSparseSampleProjDrefImplicitLod = 311, - SpvOpImageSparseSampleProjDrefExplicitLod = 312, - SpvOpImageSparseFetch = 313, - SpvOpImageSparseGather = 314, - SpvOpImageSparseDrefGather = 315, - SpvOpImageSparseTexelsResident = 316, - SpvOpNoLine = 317, - SpvOpAtomicFlagTestAndSet = 318, - SpvOpAtomicFlagClear = 319, - SpvOpImageSparseRead = 320, - SpvOpDecorateId = 332, - SpvOpSubgroupBallotKHR = 4421, - SpvOpSubgroupFirstInvocationKHR = 4422, - SpvOpSubgroupAllKHR = 4428, - SpvOpSubgroupAnyKHR = 4429, - SpvOpSubgroupAllEqualKHR = 4430, - SpvOpSubgroupReadInvocationKHR = 4432, - SpvOpGroupIAddNonUniformAMD = 5000, - SpvOpGroupFAddNonUniformAMD = 5001, - SpvOpGroupFMinNonUniformAMD = 5002, - SpvOpGroupUMinNonUniformAMD = 5003, - SpvOpGroupSMinNonUniformAMD = 5004, - SpvOpGroupFMaxNonUniformAMD = 5005, - SpvOpGroupUMaxNonUniformAMD = 5006, - SpvOpGroupSMaxNonUniformAMD = 5007, - SpvOpFragmentMaskFetchAMD = 5011, - SpvOpFragmentFetchAMD = 5012, - SpvOpSubgroupShuffleINTEL = 5571, - SpvOpSubgroupShuffleDownINTEL = 5572, - SpvOpSubgroupShuffleUpINTEL = 5573, - SpvOpSubgroupShuffleXorINTEL = 5574, - SpvOpSubgroupBlockReadINTEL = 5575, - SpvOpSubgroupBlockWriteINTEL = 5576, - SpvOpSubgroupImageBlockReadINTEL = 5577, - SpvOpSubgroupImageBlockWriteINTEL = 5578, - SpvOpDecorateStringGOOGLE = 5632, - SpvOpMemberDecorateStringGOOGLE = 5633, - SpvOpMax = 0x7fffffff, -} SpvOp; - -#endif // #ifndef spirv_H - diff --git a/dependencies/spirv/include/spirv/1.6/GLSL.std.450.h b/dependencies/spirv/include/spirv/1.6/GLSL.std.450.h new file mode 100644 index 000000000000..54cc00e9a888 --- /dev/null +++ b/dependencies/spirv/include/spirv/1.6/GLSL.std.450.h @@ -0,0 +1,131 @@ +/* +** Copyright (c) 2014-2016 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +#ifndef GLSLstd450_H +#define GLSLstd450_H + +static const int GLSLstd450Version = 100; +static const int GLSLstd450Revision = 3; + +enum GLSLstd450 { + GLSLstd450Bad = 0, // Don't use + + GLSLstd450Round = 1, + GLSLstd450RoundEven = 2, + GLSLstd450Trunc = 3, + GLSLstd450FAbs = 4, + GLSLstd450SAbs = 5, + GLSLstd450FSign = 6, + GLSLstd450SSign = 7, + GLSLstd450Floor = 8, + GLSLstd450Ceil = 9, + GLSLstd450Fract = 10, + + GLSLstd450Radians = 11, + GLSLstd450Degrees = 12, + GLSLstd450Sin = 13, + GLSLstd450Cos = 14, + GLSLstd450Tan = 15, + GLSLstd450Asin = 16, + GLSLstd450Acos = 17, + GLSLstd450Atan = 18, + GLSLstd450Sinh = 19, + GLSLstd450Cosh = 20, + GLSLstd450Tanh = 21, + GLSLstd450Asinh = 22, + GLSLstd450Acosh = 23, + GLSLstd450Atanh = 24, + GLSLstd450Atan2 = 25, + + GLSLstd450Pow = 26, + GLSLstd450Exp = 27, + GLSLstd450Log = 28, + GLSLstd450Exp2 = 29, + GLSLstd450Log2 = 30, + GLSLstd450Sqrt = 31, + GLSLstd450InverseSqrt = 32, + + GLSLstd450Determinant = 33, + GLSLstd450MatrixInverse = 34, + + GLSLstd450Modf = 35, // second operand needs an OpVariable to write to + GLSLstd450ModfStruct = 36, // no OpVariable operand + GLSLstd450FMin = 37, + GLSLstd450UMin = 38, + GLSLstd450SMin = 39, + GLSLstd450FMax = 40, + GLSLstd450UMax = 41, + GLSLstd450SMax = 42, + GLSLstd450FClamp = 43, + GLSLstd450UClamp = 44, + GLSLstd450SClamp = 45, + GLSLstd450FMix = 46, + GLSLstd450IMix = 47, // Reserved + GLSLstd450Step = 48, + GLSLstd450SmoothStep = 49, + + GLSLstd450Fma = 50, + GLSLstd450Frexp = 51, // second operand needs an OpVariable to write to + GLSLstd450FrexpStruct = 52, // no OpVariable operand + GLSLstd450Ldexp = 53, + + GLSLstd450PackSnorm4x8 = 54, + GLSLstd450PackUnorm4x8 = 55, + GLSLstd450PackSnorm2x16 = 56, + GLSLstd450PackUnorm2x16 = 57, + GLSLstd450PackHalf2x16 = 58, + GLSLstd450PackDouble2x32 = 59, + GLSLstd450UnpackSnorm2x16 = 60, + GLSLstd450UnpackUnorm2x16 = 61, + GLSLstd450UnpackHalf2x16 = 62, + GLSLstd450UnpackSnorm4x8 = 63, + GLSLstd450UnpackUnorm4x8 = 64, + GLSLstd450UnpackDouble2x32 = 65, + + GLSLstd450Length = 66, + GLSLstd450Distance = 67, + GLSLstd450Cross = 68, + GLSLstd450Normalize = 69, + GLSLstd450FaceForward = 70, + GLSLstd450Reflect = 71, + GLSLstd450Refract = 72, + + GLSLstd450FindILsb = 73, + GLSLstd450FindSMsb = 74, + GLSLstd450FindUMsb = 75, + + GLSLstd450InterpolateAtCentroid = 76, + GLSLstd450InterpolateAtSample = 77, + GLSLstd450InterpolateAtOffset = 78, + + GLSLstd450NMin = 79, + GLSLstd450NMax = 80, + GLSLstd450NClamp = 81, + + GLSLstd450Count +}; + +#endif // #ifndef GLSLstd450_H diff --git a/dependencies/spirv/include/spirv/1.6/spirv.h b/dependencies/spirv/include/spirv/1.6/spirv.h new file mode 100644 index 000000000000..73d6c76614a2 --- /dev/null +++ b/dependencies/spirv/include/spirv/1.6/spirv.h @@ -0,0 +1,2576 @@ +/* +** Copyright (c) 2014-2020 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a copy +** of this software and/or associated documentation files (the "Materials"), +** to deal in the Materials without restriction, including without limitation +** the rights to use, copy, modify, merge, publish, distribute, sublicense, +** and/or sell copies of the Materials, and to permit persons to whom the +** Materials are furnished to do so, subject to the following conditions: +** +** The above copyright notice and this permission notice shall be included in +** all copies or substantial portions of the Materials. +** +** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS +** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND +** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS +** IN THE MATERIALS. +*/ + +/* +** This header is automatically generated by the same tool that creates +** the Binary Section of the SPIR-V specification. +*/ + +/* +** Enumeration tokens for SPIR-V, in various styles: +** C, C++, C++11, JSON, Lua, Python, C#, D, Beef +** +** - C will have tokens with a "Spv" prefix, e.g.: SpvSourceLanguageGLSL +** - C++ will have tokens in the "spv" name space, e.g.: spv::SourceLanguageGLSL +** - C++11 will use enum classes in the spv namespace, e.g.: spv::SourceLanguage::GLSL +** - Lua will use tables, e.g.: spv.SourceLanguage.GLSL +** - Python will use dictionaries, e.g.: spv['SourceLanguage']['GLSL'] +** - C# will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL +** - D will have tokens under the "spv" module, e.g: spv.SourceLanguage.GLSL +** - Beef will use enum classes in the Specification class located in the "Spv" namespace, +** e.g.: Spv.Specification.SourceLanguage.GLSL +** +** Some tokens act like mask values, which can be OR'd together, +** while others are mutually exclusive. The mask-like ones have +** "Mask" in their name, and a parallel enum that has the shift +** amount (1 << x) for each corresponding enumerant. +*/ + +#ifndef spirv_H +#define spirv_H + +typedef unsigned int SpvId; + +#define SPV_VERSION 0x10600 +#define SPV_REVISION 1 + +static const unsigned int SpvMagicNumber = 0x07230203; +static const unsigned int SpvVersion = 0x00010600; +static const unsigned int SpvRevision = 1; +static const unsigned int SpvOpCodeMask = 0xffff; +static const unsigned int SpvWordCountShift = 16; + +typedef enum SpvSourceLanguage_ { + SpvSourceLanguageUnknown = 0, + SpvSourceLanguageESSL = 1, + SpvSourceLanguageGLSL = 2, + SpvSourceLanguageOpenCL_C = 3, + SpvSourceLanguageOpenCL_CPP = 4, + SpvSourceLanguageHLSL = 5, + SpvSourceLanguageCPP_for_OpenCL = 6, + SpvSourceLanguageSYCL = 7, + SpvSourceLanguageMax = 0x7fffffff, +} SpvSourceLanguage; + +typedef enum SpvExecutionModel_ { + SpvExecutionModelVertex = 0, + SpvExecutionModelTessellationControl = 1, + SpvExecutionModelTessellationEvaluation = 2, + SpvExecutionModelGeometry = 3, + SpvExecutionModelFragment = 4, + SpvExecutionModelGLCompute = 5, + SpvExecutionModelKernel = 6, + SpvExecutionModelTaskNV = 5267, + SpvExecutionModelMeshNV = 5268, + SpvExecutionModelRayGenerationKHR = 5313, + SpvExecutionModelRayGenerationNV = 5313, + SpvExecutionModelIntersectionKHR = 5314, + SpvExecutionModelIntersectionNV = 5314, + SpvExecutionModelAnyHitKHR = 5315, + SpvExecutionModelAnyHitNV = 5315, + SpvExecutionModelClosestHitKHR = 5316, + SpvExecutionModelClosestHitNV = 5316, + SpvExecutionModelMissKHR = 5317, + SpvExecutionModelMissNV = 5317, + SpvExecutionModelCallableKHR = 5318, + SpvExecutionModelCallableNV = 5318, + SpvExecutionModelTaskEXT = 5364, + SpvExecutionModelMeshEXT = 5365, + SpvExecutionModelMax = 0x7fffffff, +} SpvExecutionModel; + +typedef enum SpvAddressingModel_ { + SpvAddressingModelLogical = 0, + SpvAddressingModelPhysical32 = 1, + SpvAddressingModelPhysical64 = 2, + SpvAddressingModelPhysicalStorageBuffer64 = 5348, + SpvAddressingModelPhysicalStorageBuffer64EXT = 5348, + SpvAddressingModelMax = 0x7fffffff, +} SpvAddressingModel; + +typedef enum SpvMemoryModel_ { + SpvMemoryModelSimple = 0, + SpvMemoryModelGLSL450 = 1, + SpvMemoryModelOpenCL = 2, + SpvMemoryModelVulkan = 3, + SpvMemoryModelVulkanKHR = 3, + SpvMemoryModelMax = 0x7fffffff, +} SpvMemoryModel; + +typedef enum SpvExecutionMode_ { + SpvExecutionModeInvocations = 0, + SpvExecutionModeSpacingEqual = 1, + SpvExecutionModeSpacingFractionalEven = 2, + SpvExecutionModeSpacingFractionalOdd = 3, + SpvExecutionModeVertexOrderCw = 4, + SpvExecutionModeVertexOrderCcw = 5, + SpvExecutionModePixelCenterInteger = 6, + SpvExecutionModeOriginUpperLeft = 7, + SpvExecutionModeOriginLowerLeft = 8, + SpvExecutionModeEarlyFragmentTests = 9, + SpvExecutionModePointMode = 10, + SpvExecutionModeXfb = 11, + SpvExecutionModeDepthReplacing = 12, + SpvExecutionModeDepthGreater = 14, + SpvExecutionModeDepthLess = 15, + SpvExecutionModeDepthUnchanged = 16, + SpvExecutionModeLocalSize = 17, + SpvExecutionModeLocalSizeHint = 18, + SpvExecutionModeInputPoints = 19, + SpvExecutionModeInputLines = 20, + SpvExecutionModeInputLinesAdjacency = 21, + SpvExecutionModeTriangles = 22, + SpvExecutionModeInputTrianglesAdjacency = 23, + SpvExecutionModeQuads = 24, + SpvExecutionModeIsolines = 25, + SpvExecutionModeOutputVertices = 26, + SpvExecutionModeOutputPoints = 27, + SpvExecutionModeOutputLineStrip = 28, + SpvExecutionModeOutputTriangleStrip = 29, + SpvExecutionModeVecTypeHint = 30, + SpvExecutionModeContractionOff = 31, + SpvExecutionModeInitializer = 33, + SpvExecutionModeFinalizer = 34, + SpvExecutionModeSubgroupSize = 35, + SpvExecutionModeSubgroupsPerWorkgroup = 36, + SpvExecutionModeSubgroupsPerWorkgroupId = 37, + SpvExecutionModeLocalSizeId = 38, + SpvExecutionModeLocalSizeHintId = 39, + SpvExecutionModeSubgroupUniformControlFlowKHR = 4421, + SpvExecutionModePostDepthCoverage = 4446, + SpvExecutionModeDenormPreserve = 4459, + SpvExecutionModeDenormFlushToZero = 4460, + SpvExecutionModeSignedZeroInfNanPreserve = 4461, + SpvExecutionModeRoundingModeRTE = 4462, + SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017, + SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeStencilRefUnchangedFrontAMD = 5079, + SpvExecutionModeStencilRefGreaterFrontAMD = 5080, + SpvExecutionModeStencilRefLessFrontAMD = 5081, + SpvExecutionModeStencilRefUnchangedBackAMD = 5082, + SpvExecutionModeStencilRefGreaterBackAMD = 5083, + SpvExecutionModeStencilRefLessBackAMD = 5084, + SpvExecutionModeOutputLinesEXT = 5269, + SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesEXT = 5270, + SpvExecutionModeOutputPrimitivesNV = 5270, + SpvExecutionModeDerivativeGroupQuadsNV = 5289, + SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesEXT = 5298, + SpvExecutionModeOutputTrianglesNV = 5298, + SpvExecutionModePixelInterlockOrderedEXT = 5366, + SpvExecutionModePixelInterlockUnorderedEXT = 5367, + SpvExecutionModeSampleInterlockOrderedEXT = 5368, + SpvExecutionModeSampleInterlockUnorderedEXT = 5369, + SpvExecutionModeShadingRateInterlockOrderedEXT = 5370, + SpvExecutionModeShadingRateInterlockUnorderedEXT = 5371, + SpvExecutionModeSharedLocalMemorySizeINTEL = 5618, + SpvExecutionModeRoundingModeRTPINTEL = 5620, + SpvExecutionModeRoundingModeRTNINTEL = 5621, + SpvExecutionModeFloatingPointModeALTINTEL = 5622, + SpvExecutionModeFloatingPointModeIEEEINTEL = 5623, + SpvExecutionModeMaxWorkgroupSizeINTEL = 5893, + SpvExecutionModeMaxWorkDimINTEL = 5894, + SpvExecutionModeNoGlobalOffsetINTEL = 5895, + SpvExecutionModeNumSIMDWorkitemsINTEL = 5896, + SpvExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + SpvExecutionModeNamedBarrierCountINTEL = 6417, + SpvExecutionModeMax = 0x7fffffff, +} SpvExecutionMode; + +typedef enum SpvStorageClass_ { + SpvStorageClassUniformConstant = 0, + SpvStorageClassInput = 1, + SpvStorageClassUniform = 2, + SpvStorageClassOutput = 3, + SpvStorageClassWorkgroup = 4, + SpvStorageClassCrossWorkgroup = 5, + SpvStorageClassPrivate = 6, + SpvStorageClassFunction = 7, + SpvStorageClassGeneric = 8, + SpvStorageClassPushConstant = 9, + SpvStorageClassAtomicCounter = 10, + SpvStorageClassImage = 11, + SpvStorageClassStorageBuffer = 12, + SpvStorageClassCallableDataKHR = 5328, + SpvStorageClassCallableDataNV = 5328, + SpvStorageClassIncomingCallableDataKHR = 5329, + SpvStorageClassIncomingCallableDataNV = 5329, + SpvStorageClassRayPayloadKHR = 5338, + SpvStorageClassRayPayloadNV = 5338, + SpvStorageClassHitAttributeKHR = 5339, + SpvStorageClassHitAttributeNV = 5339, + SpvStorageClassIncomingRayPayloadKHR = 5342, + SpvStorageClassIncomingRayPayloadNV = 5342, + SpvStorageClassShaderRecordBufferKHR = 5343, + SpvStorageClassShaderRecordBufferNV = 5343, + SpvStorageClassPhysicalStorageBuffer = 5349, + SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassTaskPayloadWorkgroupEXT = 5402, + SpvStorageClassCodeSectionINTEL = 5605, + SpvStorageClassDeviceOnlyINTEL = 5936, + SpvStorageClassHostOnlyINTEL = 5937, + SpvStorageClassMax = 0x7fffffff, +} SpvStorageClass; + +typedef enum SpvDim_ { + SpvDim1D = 0, + SpvDim2D = 1, + SpvDim3D = 2, + SpvDimCube = 3, + SpvDimRect = 4, + SpvDimBuffer = 5, + SpvDimSubpassData = 6, + SpvDimMax = 0x7fffffff, +} SpvDim; + +typedef enum SpvSamplerAddressingMode_ { + SpvSamplerAddressingModeNone = 0, + SpvSamplerAddressingModeClampToEdge = 1, + SpvSamplerAddressingModeClamp = 2, + SpvSamplerAddressingModeRepeat = 3, + SpvSamplerAddressingModeRepeatMirrored = 4, + SpvSamplerAddressingModeMax = 0x7fffffff, +} SpvSamplerAddressingMode; + +typedef enum SpvSamplerFilterMode_ { + SpvSamplerFilterModeNearest = 0, + SpvSamplerFilterModeLinear = 1, + SpvSamplerFilterModeMax = 0x7fffffff, +} SpvSamplerFilterMode; + +typedef enum SpvImageFormat_ { + SpvImageFormatUnknown = 0, + SpvImageFormatRgba32f = 1, + SpvImageFormatRgba16f = 2, + SpvImageFormatR32f = 3, + SpvImageFormatRgba8 = 4, + SpvImageFormatRgba8Snorm = 5, + SpvImageFormatRg32f = 6, + SpvImageFormatRg16f = 7, + SpvImageFormatR11fG11fB10f = 8, + SpvImageFormatR16f = 9, + SpvImageFormatRgba16 = 10, + SpvImageFormatRgb10A2 = 11, + SpvImageFormatRg16 = 12, + SpvImageFormatRg8 = 13, + SpvImageFormatR16 = 14, + SpvImageFormatR8 = 15, + SpvImageFormatRgba16Snorm = 16, + SpvImageFormatRg16Snorm = 17, + SpvImageFormatRg8Snorm = 18, + SpvImageFormatR16Snorm = 19, + SpvImageFormatR8Snorm = 20, + SpvImageFormatRgba32i = 21, + SpvImageFormatRgba16i = 22, + SpvImageFormatRgba8i = 23, + SpvImageFormatR32i = 24, + SpvImageFormatRg32i = 25, + SpvImageFormatRg16i = 26, + SpvImageFormatRg8i = 27, + SpvImageFormatR16i = 28, + SpvImageFormatR8i = 29, + SpvImageFormatRgba32ui = 30, + SpvImageFormatRgba16ui = 31, + SpvImageFormatRgba8ui = 32, + SpvImageFormatR32ui = 33, + SpvImageFormatRgb10a2ui = 34, + SpvImageFormatRg32ui = 35, + SpvImageFormatRg16ui = 36, + SpvImageFormatRg8ui = 37, + SpvImageFormatR16ui = 38, + SpvImageFormatR8ui = 39, + SpvImageFormatR64ui = 40, + SpvImageFormatR64i = 41, + SpvImageFormatMax = 0x7fffffff, +} SpvImageFormat; + +typedef enum SpvImageChannelOrder_ { + SpvImageChannelOrderR = 0, + SpvImageChannelOrderA = 1, + SpvImageChannelOrderRG = 2, + SpvImageChannelOrderRA = 3, + SpvImageChannelOrderRGB = 4, + SpvImageChannelOrderRGBA = 5, + SpvImageChannelOrderBGRA = 6, + SpvImageChannelOrderARGB = 7, + SpvImageChannelOrderIntensity = 8, + SpvImageChannelOrderLuminance = 9, + SpvImageChannelOrderRx = 10, + SpvImageChannelOrderRGx = 11, + SpvImageChannelOrderRGBx = 12, + SpvImageChannelOrderDepth = 13, + SpvImageChannelOrderDepthStencil = 14, + SpvImageChannelOrdersRGB = 15, + SpvImageChannelOrdersRGBx = 16, + SpvImageChannelOrdersRGBA = 17, + SpvImageChannelOrdersBGRA = 18, + SpvImageChannelOrderABGR = 19, + SpvImageChannelOrderMax = 0x7fffffff, +} SpvImageChannelOrder; + +typedef enum SpvImageChannelDataType_ { + SpvImageChannelDataTypeSnormInt8 = 0, + SpvImageChannelDataTypeSnormInt16 = 1, + SpvImageChannelDataTypeUnormInt8 = 2, + SpvImageChannelDataTypeUnormInt16 = 3, + SpvImageChannelDataTypeUnormShort565 = 4, + SpvImageChannelDataTypeUnormShort555 = 5, + SpvImageChannelDataTypeUnormInt101010 = 6, + SpvImageChannelDataTypeSignedInt8 = 7, + SpvImageChannelDataTypeSignedInt16 = 8, + SpvImageChannelDataTypeSignedInt32 = 9, + SpvImageChannelDataTypeUnsignedInt8 = 10, + SpvImageChannelDataTypeUnsignedInt16 = 11, + SpvImageChannelDataTypeUnsignedInt32 = 12, + SpvImageChannelDataTypeHalfFloat = 13, + SpvImageChannelDataTypeFloat = 14, + SpvImageChannelDataTypeUnormInt24 = 15, + SpvImageChannelDataTypeUnormInt101010_2 = 16, + SpvImageChannelDataTypeMax = 0x7fffffff, +} SpvImageChannelDataType; + +typedef enum SpvImageOperandsShift_ { + SpvImageOperandsBiasShift = 0, + SpvImageOperandsLodShift = 1, + SpvImageOperandsGradShift = 2, + SpvImageOperandsConstOffsetShift = 3, + SpvImageOperandsOffsetShift = 4, + SpvImageOperandsConstOffsetsShift = 5, + SpvImageOperandsSampleShift = 6, + SpvImageOperandsMinLodShift = 7, + SpvImageOperandsMakeTexelAvailableShift = 8, + SpvImageOperandsMakeTexelAvailableKHRShift = 8, + SpvImageOperandsMakeTexelVisibleShift = 9, + SpvImageOperandsMakeTexelVisibleKHRShift = 9, + SpvImageOperandsNonPrivateTexelShift = 10, + SpvImageOperandsNonPrivateTexelKHRShift = 10, + SpvImageOperandsVolatileTexelShift = 11, + SpvImageOperandsVolatileTexelKHRShift = 11, + SpvImageOperandsSignExtendShift = 12, + SpvImageOperandsZeroExtendShift = 13, + SpvImageOperandsNontemporalShift = 14, + SpvImageOperandsOffsetsShift = 16, + SpvImageOperandsMax = 0x7fffffff, +} SpvImageOperandsShift; + +typedef enum SpvImageOperandsMask_ { + SpvImageOperandsMaskNone = 0, + SpvImageOperandsBiasMask = 0x00000001, + SpvImageOperandsLodMask = 0x00000002, + SpvImageOperandsGradMask = 0x00000004, + SpvImageOperandsConstOffsetMask = 0x00000008, + SpvImageOperandsOffsetMask = 0x00000010, + SpvImageOperandsConstOffsetsMask = 0x00000020, + SpvImageOperandsSampleMask = 0x00000040, + SpvImageOperandsMinLodMask = 0x00000080, + SpvImageOperandsMakeTexelAvailableMask = 0x00000100, + SpvImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + SpvImageOperandsMakeTexelVisibleMask = 0x00000200, + SpvImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + SpvImageOperandsNonPrivateTexelMask = 0x00000400, + SpvImageOperandsNonPrivateTexelKHRMask = 0x00000400, + SpvImageOperandsVolatileTexelMask = 0x00000800, + SpvImageOperandsVolatileTexelKHRMask = 0x00000800, + SpvImageOperandsSignExtendMask = 0x00001000, + SpvImageOperandsZeroExtendMask = 0x00002000, + SpvImageOperandsNontemporalMask = 0x00004000, + SpvImageOperandsOffsetsMask = 0x00010000, +} SpvImageOperandsMask; + +typedef enum SpvFPFastMathModeShift_ { + SpvFPFastMathModeNotNaNShift = 0, + SpvFPFastMathModeNotInfShift = 1, + SpvFPFastMathModeNSZShift = 2, + SpvFPFastMathModeAllowRecipShift = 3, + SpvFPFastMathModeFastShift = 4, + SpvFPFastMathModeAllowContractFastINTELShift = 16, + SpvFPFastMathModeAllowReassocINTELShift = 17, + SpvFPFastMathModeMax = 0x7fffffff, +} SpvFPFastMathModeShift; + +typedef enum SpvFPFastMathModeMask_ { + SpvFPFastMathModeMaskNone = 0, + SpvFPFastMathModeNotNaNMask = 0x00000001, + SpvFPFastMathModeNotInfMask = 0x00000002, + SpvFPFastMathModeNSZMask = 0x00000004, + SpvFPFastMathModeAllowRecipMask = 0x00000008, + SpvFPFastMathModeFastMask = 0x00000010, + SpvFPFastMathModeAllowContractFastINTELMask = 0x00010000, + SpvFPFastMathModeAllowReassocINTELMask = 0x00020000, +} SpvFPFastMathModeMask; + +typedef enum SpvFPRoundingMode_ { + SpvFPRoundingModeRTE = 0, + SpvFPRoundingModeRTZ = 1, + SpvFPRoundingModeRTP = 2, + SpvFPRoundingModeRTN = 3, + SpvFPRoundingModeMax = 0x7fffffff, +} SpvFPRoundingMode; + +typedef enum SpvLinkageType_ { + SpvLinkageTypeExport = 0, + SpvLinkageTypeImport = 1, + SpvLinkageTypeLinkOnceODR = 2, + SpvLinkageTypeMax = 0x7fffffff, +} SpvLinkageType; + +typedef enum SpvAccessQualifier_ { + SpvAccessQualifierReadOnly = 0, + SpvAccessQualifierWriteOnly = 1, + SpvAccessQualifierReadWrite = 2, + SpvAccessQualifierMax = 0x7fffffff, +} SpvAccessQualifier; + +typedef enum SpvFunctionParameterAttribute_ { + SpvFunctionParameterAttributeZext = 0, + SpvFunctionParameterAttributeSext = 1, + SpvFunctionParameterAttributeByVal = 2, + SpvFunctionParameterAttributeSret = 3, + SpvFunctionParameterAttributeNoAlias = 4, + SpvFunctionParameterAttributeNoCapture = 5, + SpvFunctionParameterAttributeNoWrite = 6, + SpvFunctionParameterAttributeNoReadWrite = 7, + SpvFunctionParameterAttributeMax = 0x7fffffff, +} SpvFunctionParameterAttribute; + +typedef enum SpvDecoration_ { + SpvDecorationRelaxedPrecision = 0, + SpvDecorationSpecId = 1, + SpvDecorationBlock = 2, + SpvDecorationBufferBlock = 3, + SpvDecorationRowMajor = 4, + SpvDecorationColMajor = 5, + SpvDecorationArrayStride = 6, + SpvDecorationMatrixStride = 7, + SpvDecorationGLSLShared = 8, + SpvDecorationGLSLPacked = 9, + SpvDecorationCPacked = 10, + SpvDecorationBuiltIn = 11, + SpvDecorationNoPerspective = 13, + SpvDecorationFlat = 14, + SpvDecorationPatch = 15, + SpvDecorationCentroid = 16, + SpvDecorationSample = 17, + SpvDecorationInvariant = 18, + SpvDecorationRestrict = 19, + SpvDecorationAliased = 20, + SpvDecorationVolatile = 21, + SpvDecorationConstant = 22, + SpvDecorationCoherent = 23, + SpvDecorationNonWritable = 24, + SpvDecorationNonReadable = 25, + SpvDecorationUniform = 26, + SpvDecorationUniformId = 27, + SpvDecorationSaturatedConversion = 28, + SpvDecorationStream = 29, + SpvDecorationLocation = 30, + SpvDecorationComponent = 31, + SpvDecorationIndex = 32, + SpvDecorationBinding = 33, + SpvDecorationDescriptorSet = 34, + SpvDecorationOffset = 35, + SpvDecorationXfbBuffer = 36, + SpvDecorationXfbStride = 37, + SpvDecorationFuncParamAttr = 38, + SpvDecorationFPRoundingMode = 39, + SpvDecorationFPFastMathMode = 40, + SpvDecorationLinkageAttributes = 41, + SpvDecorationNoContraction = 42, + SpvDecorationInputAttachmentIndex = 43, + SpvDecorationAlignment = 44, + SpvDecorationMaxByteOffset = 45, + SpvDecorationAlignmentId = 46, + SpvDecorationMaxByteOffsetId = 47, + SpvDecorationNoSignedWrap = 4469, + SpvDecorationNoUnsignedWrap = 4470, + SpvDecorationExplicitInterpAMD = 4999, + SpvDecorationOverrideCoverageNV = 5248, + SpvDecorationPassthroughNV = 5250, + SpvDecorationViewportRelativeNV = 5252, + SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveEXT = 5271, + SpvDecorationPerPrimitiveNV = 5271, + SpvDecorationPerViewNV = 5272, + SpvDecorationPerTaskNV = 5273, + SpvDecorationPerVertexKHR = 5285, + SpvDecorationPerVertexNV = 5285, + SpvDecorationNonUniform = 5300, + SpvDecorationNonUniformEXT = 5300, + SpvDecorationRestrictPointer = 5355, + SpvDecorationRestrictPointerEXT = 5355, + SpvDecorationAliasedPointer = 5356, + SpvDecorationAliasedPointerEXT = 5356, + SpvDecorationBindlessSamplerNV = 5398, + SpvDecorationBindlessImageNV = 5399, + SpvDecorationBoundSamplerNV = 5400, + SpvDecorationBoundImageNV = 5401, + SpvDecorationSIMTCallINTEL = 5599, + SpvDecorationReferencedIndirectlyINTEL = 5602, + SpvDecorationClobberINTEL = 5607, + SpvDecorationSideEffectsINTEL = 5608, + SpvDecorationVectorComputeVariableINTEL = 5624, + SpvDecorationFuncParamIOKindINTEL = 5625, + SpvDecorationVectorComputeFunctionINTEL = 5626, + SpvDecorationStackCallINTEL = 5627, + SpvDecorationGlobalVariableOffsetINTEL = 5628, + SpvDecorationCounterBuffer = 5634, + SpvDecorationHlslCounterBufferGOOGLE = 5634, + SpvDecorationHlslSemanticGOOGLE = 5635, + SpvDecorationUserSemantic = 5635, + SpvDecorationUserTypeGOOGLE = 5636, + SpvDecorationFunctionRoundingModeINTEL = 5822, + SpvDecorationFunctionDenormModeINTEL = 5823, + SpvDecorationRegisterINTEL = 5825, + SpvDecorationMemoryINTEL = 5826, + SpvDecorationNumbanksINTEL = 5827, + SpvDecorationBankwidthINTEL = 5828, + SpvDecorationMaxPrivateCopiesINTEL = 5829, + SpvDecorationSinglepumpINTEL = 5830, + SpvDecorationDoublepumpINTEL = 5831, + SpvDecorationMaxReplicatesINTEL = 5832, + SpvDecorationSimpleDualPortINTEL = 5833, + SpvDecorationMergeINTEL = 5834, + SpvDecorationBankBitsINTEL = 5835, + SpvDecorationForcePow2DepthINTEL = 5836, + SpvDecorationBurstCoalesceINTEL = 5899, + SpvDecorationCacheSizeINTEL = 5900, + SpvDecorationDontStaticallyCoalesceINTEL = 5901, + SpvDecorationPrefetchINTEL = 5902, + SpvDecorationStallEnableINTEL = 5905, + SpvDecorationFuseLoopsInFunctionINTEL = 5907, + SpvDecorationAliasScopeINTEL = 5914, + SpvDecorationNoAliasINTEL = 5915, + SpvDecorationBufferLocationINTEL = 5921, + SpvDecorationIOPipeStorageINTEL = 5944, + SpvDecorationFunctionFloatingPointModeINTEL = 6080, + SpvDecorationSingleElementVectorINTEL = 6085, + SpvDecorationVectorComputeCallableFunctionINTEL = 6087, + SpvDecorationMediaBlockIOINTEL = 6140, + SpvDecorationMax = 0x7fffffff, +} SpvDecoration; + +typedef enum SpvBuiltIn_ { + SpvBuiltInPosition = 0, + SpvBuiltInPointSize = 1, + SpvBuiltInClipDistance = 3, + SpvBuiltInCullDistance = 4, + SpvBuiltInVertexId = 5, + SpvBuiltInInstanceId = 6, + SpvBuiltInPrimitiveId = 7, + SpvBuiltInInvocationId = 8, + SpvBuiltInLayer = 9, + SpvBuiltInViewportIndex = 10, + SpvBuiltInTessLevelOuter = 11, + SpvBuiltInTessLevelInner = 12, + SpvBuiltInTessCoord = 13, + SpvBuiltInPatchVertices = 14, + SpvBuiltInFragCoord = 15, + SpvBuiltInPointCoord = 16, + SpvBuiltInFrontFacing = 17, + SpvBuiltInSampleId = 18, + SpvBuiltInSamplePosition = 19, + SpvBuiltInSampleMask = 20, + SpvBuiltInFragDepth = 22, + SpvBuiltInHelperInvocation = 23, + SpvBuiltInNumWorkgroups = 24, + SpvBuiltInWorkgroupSize = 25, + SpvBuiltInWorkgroupId = 26, + SpvBuiltInLocalInvocationId = 27, + SpvBuiltInGlobalInvocationId = 28, + SpvBuiltInLocalInvocationIndex = 29, + SpvBuiltInWorkDim = 30, + SpvBuiltInGlobalSize = 31, + SpvBuiltInEnqueuedWorkgroupSize = 32, + SpvBuiltInGlobalOffset = 33, + SpvBuiltInGlobalLinearId = 34, + SpvBuiltInSubgroupSize = 36, + SpvBuiltInSubgroupMaxSize = 37, + SpvBuiltInNumSubgroups = 38, + SpvBuiltInNumEnqueuedSubgroups = 39, + SpvBuiltInSubgroupId = 40, + SpvBuiltInSubgroupLocalInvocationId = 41, + SpvBuiltInVertexIndex = 42, + SpvBuiltInInstanceIndex = 43, + SpvBuiltInCoreIDARM = 4160, + SpvBuiltInCoreCountARM = 4161, + SpvBuiltInCoreMaxIDARM = 4162, + SpvBuiltInWarpIDARM = 4163, + SpvBuiltInWarpMaxIDARM = 4164, + SpvBuiltInSubgroupEqMask = 4416, + SpvBuiltInSubgroupEqMaskKHR = 4416, + SpvBuiltInSubgroupGeMask = 4417, + SpvBuiltInSubgroupGeMaskKHR = 4417, + SpvBuiltInSubgroupGtMask = 4418, + SpvBuiltInSubgroupGtMaskKHR = 4418, + SpvBuiltInSubgroupLeMask = 4419, + SpvBuiltInSubgroupLeMaskKHR = 4419, + SpvBuiltInSubgroupLtMask = 4420, + SpvBuiltInSubgroupLtMaskKHR = 4420, + SpvBuiltInBaseVertex = 4424, + SpvBuiltInBaseInstance = 4425, + SpvBuiltInDrawIndex = 4426, + SpvBuiltInPrimitiveShadingRateKHR = 4432, + SpvBuiltInDeviceIndex = 4438, + SpvBuiltInViewIndex = 4440, + SpvBuiltInShadingRateKHR = 4444, + SpvBuiltInBaryCoordNoPerspAMD = 4992, + SpvBuiltInBaryCoordNoPerspCentroidAMD = 4993, + SpvBuiltInBaryCoordNoPerspSampleAMD = 4994, + SpvBuiltInBaryCoordSmoothAMD = 4995, + SpvBuiltInBaryCoordSmoothCentroidAMD = 4996, + SpvBuiltInBaryCoordSmoothSampleAMD = 4997, + SpvBuiltInBaryCoordPullModelAMD = 4998, + SpvBuiltInFragStencilRefEXT = 5014, + SpvBuiltInViewportMaskNV = 5253, + SpvBuiltInSecondaryPositionNV = 5257, + SpvBuiltInSecondaryViewportMaskNV = 5258, + SpvBuiltInPositionPerViewNV = 5261, + SpvBuiltInViewportMaskPerViewNV = 5262, + SpvBuiltInFullyCoveredEXT = 5264, + SpvBuiltInTaskCountNV = 5274, + SpvBuiltInPrimitiveCountNV = 5275, + SpvBuiltInPrimitiveIndicesNV = 5276, + SpvBuiltInClipDistancePerViewNV = 5277, + SpvBuiltInCullDistancePerViewNV = 5278, + SpvBuiltInLayerPerViewNV = 5279, + SpvBuiltInMeshViewCountNV = 5280, + SpvBuiltInMeshViewIndicesNV = 5281, + SpvBuiltInBaryCoordKHR = 5286, + SpvBuiltInBaryCoordNV = 5286, + SpvBuiltInBaryCoordNoPerspKHR = 5287, + SpvBuiltInBaryCoordNoPerspNV = 5287, + SpvBuiltInFragSizeEXT = 5292, + SpvBuiltInFragmentSizeNV = 5292, + SpvBuiltInFragInvocationCountEXT = 5293, + SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInPrimitivePointIndicesEXT = 5294, + SpvBuiltInPrimitiveLineIndicesEXT = 5295, + SpvBuiltInPrimitiveTriangleIndicesEXT = 5296, + SpvBuiltInCullPrimitiveEXT = 5299, + SpvBuiltInLaunchIdKHR = 5319, + SpvBuiltInLaunchIdNV = 5319, + SpvBuiltInLaunchSizeKHR = 5320, + SpvBuiltInLaunchSizeNV = 5320, + SpvBuiltInWorldRayOriginKHR = 5321, + SpvBuiltInWorldRayOriginNV = 5321, + SpvBuiltInWorldRayDirectionKHR = 5322, + SpvBuiltInWorldRayDirectionNV = 5322, + SpvBuiltInObjectRayOriginKHR = 5323, + SpvBuiltInObjectRayOriginNV = 5323, + SpvBuiltInObjectRayDirectionKHR = 5324, + SpvBuiltInObjectRayDirectionNV = 5324, + SpvBuiltInRayTminKHR = 5325, + SpvBuiltInRayTminNV = 5325, + SpvBuiltInRayTmaxKHR = 5326, + SpvBuiltInRayTmaxNV = 5326, + SpvBuiltInInstanceCustomIndexKHR = 5327, + SpvBuiltInInstanceCustomIndexNV = 5327, + SpvBuiltInObjectToWorldKHR = 5330, + SpvBuiltInObjectToWorldNV = 5330, + SpvBuiltInWorldToObjectKHR = 5331, + SpvBuiltInWorldToObjectNV = 5331, + SpvBuiltInHitTNV = 5332, + SpvBuiltInHitKindKHR = 5333, + SpvBuiltInHitKindNV = 5333, + SpvBuiltInCurrentRayTimeNV = 5334, + SpvBuiltInIncomingRayFlagsKHR = 5351, + SpvBuiltInIncomingRayFlagsNV = 5351, + SpvBuiltInRayGeometryIndexKHR = 5352, + SpvBuiltInWarpsPerSMNV = 5374, + SpvBuiltInSMCountNV = 5375, + SpvBuiltInWarpIDNV = 5376, + SpvBuiltInSMIDNV = 5377, + SpvBuiltInCullMaskKHR = 6021, + SpvBuiltInMax = 0x7fffffff, +} SpvBuiltIn; + +typedef enum SpvSelectionControlShift_ { + SpvSelectionControlFlattenShift = 0, + SpvSelectionControlDontFlattenShift = 1, + SpvSelectionControlMax = 0x7fffffff, +} SpvSelectionControlShift; + +typedef enum SpvSelectionControlMask_ { + SpvSelectionControlMaskNone = 0, + SpvSelectionControlFlattenMask = 0x00000001, + SpvSelectionControlDontFlattenMask = 0x00000002, +} SpvSelectionControlMask; + +typedef enum SpvLoopControlShift_ { + SpvLoopControlUnrollShift = 0, + SpvLoopControlDontUnrollShift = 1, + SpvLoopControlDependencyInfiniteShift = 2, + SpvLoopControlDependencyLengthShift = 3, + SpvLoopControlMinIterationsShift = 4, + SpvLoopControlMaxIterationsShift = 5, + SpvLoopControlIterationMultipleShift = 6, + SpvLoopControlPeelCountShift = 7, + SpvLoopControlPartialCountShift = 8, + SpvLoopControlInitiationIntervalINTELShift = 16, + SpvLoopControlMaxConcurrencyINTELShift = 17, + SpvLoopControlDependencyArrayINTELShift = 18, + SpvLoopControlPipelineEnableINTELShift = 19, + SpvLoopControlLoopCoalesceINTELShift = 20, + SpvLoopControlMaxInterleavingINTELShift = 21, + SpvLoopControlSpeculatedIterationsINTELShift = 22, + SpvLoopControlNoFusionINTELShift = 23, + SpvLoopControlMax = 0x7fffffff, +} SpvLoopControlShift; + +typedef enum SpvLoopControlMask_ { + SpvLoopControlMaskNone = 0, + SpvLoopControlUnrollMask = 0x00000001, + SpvLoopControlDontUnrollMask = 0x00000002, + SpvLoopControlDependencyInfiniteMask = 0x00000004, + SpvLoopControlDependencyLengthMask = 0x00000008, + SpvLoopControlMinIterationsMask = 0x00000010, + SpvLoopControlMaxIterationsMask = 0x00000020, + SpvLoopControlIterationMultipleMask = 0x00000040, + SpvLoopControlPeelCountMask = 0x00000080, + SpvLoopControlPartialCountMask = 0x00000100, + SpvLoopControlInitiationIntervalINTELMask = 0x00010000, + SpvLoopControlMaxConcurrencyINTELMask = 0x00020000, + SpvLoopControlDependencyArrayINTELMask = 0x00040000, + SpvLoopControlPipelineEnableINTELMask = 0x00080000, + SpvLoopControlLoopCoalesceINTELMask = 0x00100000, + SpvLoopControlMaxInterleavingINTELMask = 0x00200000, + SpvLoopControlSpeculatedIterationsINTELMask = 0x00400000, + SpvLoopControlNoFusionINTELMask = 0x00800000, +} SpvLoopControlMask; + +typedef enum SpvFunctionControlShift_ { + SpvFunctionControlInlineShift = 0, + SpvFunctionControlDontInlineShift = 1, + SpvFunctionControlPureShift = 2, + SpvFunctionControlConstShift = 3, + SpvFunctionControlOptNoneINTELShift = 16, + SpvFunctionControlMax = 0x7fffffff, +} SpvFunctionControlShift; + +typedef enum SpvFunctionControlMask_ { + SpvFunctionControlMaskNone = 0, + SpvFunctionControlInlineMask = 0x00000001, + SpvFunctionControlDontInlineMask = 0x00000002, + SpvFunctionControlPureMask = 0x00000004, + SpvFunctionControlConstMask = 0x00000008, + SpvFunctionControlOptNoneINTELMask = 0x00010000, +} SpvFunctionControlMask; + +typedef enum SpvMemorySemanticsShift_ { + SpvMemorySemanticsAcquireShift = 1, + SpvMemorySemanticsReleaseShift = 2, + SpvMemorySemanticsAcquireReleaseShift = 3, + SpvMemorySemanticsSequentiallyConsistentShift = 4, + SpvMemorySemanticsUniformMemoryShift = 6, + SpvMemorySemanticsSubgroupMemoryShift = 7, + SpvMemorySemanticsWorkgroupMemoryShift = 8, + SpvMemorySemanticsCrossWorkgroupMemoryShift = 9, + SpvMemorySemanticsAtomicCounterMemoryShift = 10, + SpvMemorySemanticsImageMemoryShift = 11, + SpvMemorySemanticsOutputMemoryShift = 12, + SpvMemorySemanticsOutputMemoryKHRShift = 12, + SpvMemorySemanticsMakeAvailableShift = 13, + SpvMemorySemanticsMakeAvailableKHRShift = 13, + SpvMemorySemanticsMakeVisibleShift = 14, + SpvMemorySemanticsMakeVisibleKHRShift = 14, + SpvMemorySemanticsVolatileShift = 15, + SpvMemorySemanticsMax = 0x7fffffff, +} SpvMemorySemanticsShift; + +typedef enum SpvMemorySemanticsMask_ { + SpvMemorySemanticsMaskNone = 0, + SpvMemorySemanticsAcquireMask = 0x00000002, + SpvMemorySemanticsReleaseMask = 0x00000004, + SpvMemorySemanticsAcquireReleaseMask = 0x00000008, + SpvMemorySemanticsSequentiallyConsistentMask = 0x00000010, + SpvMemorySemanticsUniformMemoryMask = 0x00000040, + SpvMemorySemanticsSubgroupMemoryMask = 0x00000080, + SpvMemorySemanticsWorkgroupMemoryMask = 0x00000100, + SpvMemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, + SpvMemorySemanticsAtomicCounterMemoryMask = 0x00000400, + SpvMemorySemanticsImageMemoryMask = 0x00000800, + SpvMemorySemanticsOutputMemoryMask = 0x00001000, + SpvMemorySemanticsOutputMemoryKHRMask = 0x00001000, + SpvMemorySemanticsMakeAvailableMask = 0x00002000, + SpvMemorySemanticsMakeAvailableKHRMask = 0x00002000, + SpvMemorySemanticsMakeVisibleMask = 0x00004000, + SpvMemorySemanticsMakeVisibleKHRMask = 0x00004000, + SpvMemorySemanticsVolatileMask = 0x00008000, +} SpvMemorySemanticsMask; + +typedef enum SpvMemoryAccessShift_ { + SpvMemoryAccessVolatileShift = 0, + SpvMemoryAccessAlignedShift = 1, + SpvMemoryAccessNontemporalShift = 2, + SpvMemoryAccessMakePointerAvailableShift = 3, + SpvMemoryAccessMakePointerAvailableKHRShift = 3, + SpvMemoryAccessMakePointerVisibleShift = 4, + SpvMemoryAccessMakePointerVisibleKHRShift = 4, + SpvMemoryAccessNonPrivatePointerShift = 5, + SpvMemoryAccessNonPrivatePointerKHRShift = 5, + SpvMemoryAccessAliasScopeINTELMaskShift = 16, + SpvMemoryAccessNoAliasINTELMaskShift = 17, + SpvMemoryAccessMax = 0x7fffffff, +} SpvMemoryAccessShift; + +typedef enum SpvMemoryAccessMask_ { + SpvMemoryAccessMaskNone = 0, + SpvMemoryAccessVolatileMask = 0x00000001, + SpvMemoryAccessAlignedMask = 0x00000002, + SpvMemoryAccessNontemporalMask = 0x00000004, + SpvMemoryAccessMakePointerAvailableMask = 0x00000008, + SpvMemoryAccessMakePointerAvailableKHRMask = 0x00000008, + SpvMemoryAccessMakePointerVisibleMask = 0x00000010, + SpvMemoryAccessMakePointerVisibleKHRMask = 0x00000010, + SpvMemoryAccessNonPrivatePointerMask = 0x00000020, + SpvMemoryAccessNonPrivatePointerKHRMask = 0x00000020, + SpvMemoryAccessAliasScopeINTELMaskMask = 0x00010000, + SpvMemoryAccessNoAliasINTELMaskMask = 0x00020000, +} SpvMemoryAccessMask; + +typedef enum SpvScope_ { + SpvScopeCrossDevice = 0, + SpvScopeDevice = 1, + SpvScopeWorkgroup = 2, + SpvScopeSubgroup = 3, + SpvScopeInvocation = 4, + SpvScopeQueueFamily = 5, + SpvScopeQueueFamilyKHR = 5, + SpvScopeShaderCallKHR = 6, + SpvScopeMax = 0x7fffffff, +} SpvScope; + +typedef enum SpvGroupOperation_ { + SpvGroupOperationReduce = 0, + SpvGroupOperationInclusiveScan = 1, + SpvGroupOperationExclusiveScan = 2, + SpvGroupOperationClusteredReduce = 3, + SpvGroupOperationPartitionedReduceNV = 6, + SpvGroupOperationPartitionedInclusiveScanNV = 7, + SpvGroupOperationPartitionedExclusiveScanNV = 8, + SpvGroupOperationMax = 0x7fffffff, +} SpvGroupOperation; + +typedef enum SpvKernelEnqueueFlags_ { + SpvKernelEnqueueFlagsNoWait = 0, + SpvKernelEnqueueFlagsWaitKernel = 1, + SpvKernelEnqueueFlagsWaitWorkGroup = 2, + SpvKernelEnqueueFlagsMax = 0x7fffffff, +} SpvKernelEnqueueFlags; + +typedef enum SpvKernelProfilingInfoShift_ { + SpvKernelProfilingInfoCmdExecTimeShift = 0, + SpvKernelProfilingInfoMax = 0x7fffffff, +} SpvKernelProfilingInfoShift; + +typedef enum SpvKernelProfilingInfoMask_ { + SpvKernelProfilingInfoMaskNone = 0, + SpvKernelProfilingInfoCmdExecTimeMask = 0x00000001, +} SpvKernelProfilingInfoMask; + +typedef enum SpvCapability_ { + SpvCapabilityMatrix = 0, + SpvCapabilityShader = 1, + SpvCapabilityGeometry = 2, + SpvCapabilityTessellation = 3, + SpvCapabilityAddresses = 4, + SpvCapabilityLinkage = 5, + SpvCapabilityKernel = 6, + SpvCapabilityVector16 = 7, + SpvCapabilityFloat16Buffer = 8, + SpvCapabilityFloat16 = 9, + SpvCapabilityFloat64 = 10, + SpvCapabilityInt64 = 11, + SpvCapabilityInt64Atomics = 12, + SpvCapabilityImageBasic = 13, + SpvCapabilityImageReadWrite = 14, + SpvCapabilityImageMipmap = 15, + SpvCapabilityPipes = 17, + SpvCapabilityGroups = 18, + SpvCapabilityDeviceEnqueue = 19, + SpvCapabilityLiteralSampler = 20, + SpvCapabilityAtomicStorage = 21, + SpvCapabilityInt16 = 22, + SpvCapabilityTessellationPointSize = 23, + SpvCapabilityGeometryPointSize = 24, + SpvCapabilityImageGatherExtended = 25, + SpvCapabilityStorageImageMultisample = 27, + SpvCapabilityUniformBufferArrayDynamicIndexing = 28, + SpvCapabilitySampledImageArrayDynamicIndexing = 29, + SpvCapabilityStorageBufferArrayDynamicIndexing = 30, + SpvCapabilityStorageImageArrayDynamicIndexing = 31, + SpvCapabilityClipDistance = 32, + SpvCapabilityCullDistance = 33, + SpvCapabilityImageCubeArray = 34, + SpvCapabilitySampleRateShading = 35, + SpvCapabilityImageRect = 36, + SpvCapabilitySampledRect = 37, + SpvCapabilityGenericPointer = 38, + SpvCapabilityInt8 = 39, + SpvCapabilityInputAttachment = 40, + SpvCapabilitySparseResidency = 41, + SpvCapabilityMinLod = 42, + SpvCapabilitySampled1D = 43, + SpvCapabilityImage1D = 44, + SpvCapabilitySampledCubeArray = 45, + SpvCapabilitySampledBuffer = 46, + SpvCapabilityImageBuffer = 47, + SpvCapabilityImageMSArray = 48, + SpvCapabilityStorageImageExtendedFormats = 49, + SpvCapabilityImageQuery = 50, + SpvCapabilityDerivativeControl = 51, + SpvCapabilityInterpolationFunction = 52, + SpvCapabilityTransformFeedback = 53, + SpvCapabilityGeometryStreams = 54, + SpvCapabilityStorageImageReadWithoutFormat = 55, + SpvCapabilityStorageImageWriteWithoutFormat = 56, + SpvCapabilityMultiViewport = 57, + SpvCapabilitySubgroupDispatch = 58, + SpvCapabilityNamedBarrier = 59, + SpvCapabilityPipeStorage = 60, + SpvCapabilityGroupNonUniform = 61, + SpvCapabilityGroupNonUniformVote = 62, + SpvCapabilityGroupNonUniformArithmetic = 63, + SpvCapabilityGroupNonUniformBallot = 64, + SpvCapabilityGroupNonUniformShuffle = 65, + SpvCapabilityGroupNonUniformShuffleRelative = 66, + SpvCapabilityGroupNonUniformClustered = 67, + SpvCapabilityGroupNonUniformQuad = 68, + SpvCapabilityShaderLayer = 69, + SpvCapabilityShaderViewportIndex = 70, + SpvCapabilityUniformDecoration = 71, + SpvCapabilityCoreBuiltinsARM = 4165, + SpvCapabilityFragmentShadingRateKHR = 4422, + SpvCapabilitySubgroupBallotKHR = 4423, + SpvCapabilityDrawParameters = 4427, + SpvCapabilityWorkgroupMemoryExplicitLayoutKHR = 4428, + SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR = 4429, + SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR = 4430, + SpvCapabilitySubgroupVoteKHR = 4431, + SpvCapabilityStorageBuffer16BitAccess = 4433, + SpvCapabilityStorageUniformBufferBlock16 = 4433, + SpvCapabilityStorageUniform16 = 4434, + SpvCapabilityUniformAndStorageBuffer16BitAccess = 4434, + SpvCapabilityStoragePushConstant16 = 4435, + SpvCapabilityStorageInputOutput16 = 4436, + SpvCapabilityDeviceGroup = 4437, + SpvCapabilityMultiView = 4439, + SpvCapabilityVariablePointersStorageBuffer = 4441, + SpvCapabilityVariablePointers = 4442, + SpvCapabilityAtomicStorageOps = 4445, + SpvCapabilitySampleMaskPostDepthCoverage = 4447, + SpvCapabilityStorageBuffer8BitAccess = 4448, + SpvCapabilityUniformAndStorageBuffer8BitAccess = 4449, + SpvCapabilityStoragePushConstant8 = 4450, + SpvCapabilityDenormPreserve = 4464, + SpvCapabilityDenormFlushToZero = 4465, + SpvCapabilitySignedZeroInfNanPreserve = 4466, + SpvCapabilityRoundingModeRTE = 4467, + SpvCapabilityRoundingModeRTZ = 4468, + SpvCapabilityRayQueryProvisionalKHR = 4471, + SpvCapabilityRayQueryKHR = 4472, + SpvCapabilityRayTraversalPrimitiveCullingKHR = 4478, + SpvCapabilityRayTracingKHR = 4479, + SpvCapabilityFloat16ImageAMD = 5008, + SpvCapabilityImageGatherBiasLodAMD = 5009, + SpvCapabilityFragmentMaskAMD = 5010, + SpvCapabilityStencilExportEXT = 5013, + SpvCapabilityImageReadWriteLodAMD = 5015, + SpvCapabilityInt64ImageEXT = 5016, + SpvCapabilityShaderClockKHR = 5055, + SpvCapabilitySampleMaskOverrideCoverageNV = 5249, + SpvCapabilityGeometryShaderPassthroughNV = 5251, + SpvCapabilityShaderViewportIndexLayerEXT = 5254, + SpvCapabilityShaderViewportIndexLayerNV = 5254, + SpvCapabilityShaderViewportMaskNV = 5255, + SpvCapabilityShaderStereoViewNV = 5259, + SpvCapabilityPerViewAttributesNV = 5260, + SpvCapabilityFragmentFullyCoveredEXT = 5265, + SpvCapabilityMeshShadingNV = 5266, + SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityMeshShadingEXT = 5283, + SpvCapabilityFragmentBarycentricKHR = 5284, + SpvCapabilityFragmentBarycentricNV = 5284, + SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, + SpvCapabilityFragmentDensityEXT = 5291, + SpvCapabilityShadingRateNV = 5291, + SpvCapabilityGroupNonUniformPartitionedNV = 5297, + SpvCapabilityShaderNonUniform = 5301, + SpvCapabilityShaderNonUniformEXT = 5301, + SpvCapabilityRuntimeDescriptorArray = 5302, + SpvCapabilityRuntimeDescriptorArrayEXT = 5302, + SpvCapabilityInputAttachmentArrayDynamicIndexing = 5303, + SpvCapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + SpvCapabilityUniformTexelBufferArrayDynamicIndexing = 5304, + SpvCapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + SpvCapabilityStorageTexelBufferArrayDynamicIndexing = 5305, + SpvCapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + SpvCapabilityUniformBufferArrayNonUniformIndexing = 5306, + SpvCapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + SpvCapabilitySampledImageArrayNonUniformIndexing = 5307, + SpvCapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + SpvCapabilityStorageBufferArrayNonUniformIndexing = 5308, + SpvCapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + SpvCapabilityStorageImageArrayNonUniformIndexing = 5309, + SpvCapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + SpvCapabilityInputAttachmentArrayNonUniformIndexing = 5310, + SpvCapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, + SpvCapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, + SpvCapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + SpvCapabilityRayTracingNV = 5340, + SpvCapabilityRayTracingMotionBlurNV = 5341, + SpvCapabilityVulkanMemoryModel = 5345, + SpvCapabilityVulkanMemoryModelKHR = 5345, + SpvCapabilityVulkanMemoryModelDeviceScope = 5346, + SpvCapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + SpvCapabilityPhysicalStorageBufferAddresses = 5347, + SpvCapabilityPhysicalStorageBufferAddressesEXT = 5347, + SpvCapabilityComputeDerivativeGroupLinearNV = 5350, + SpvCapabilityRayTracingProvisionalKHR = 5353, + SpvCapabilityCooperativeMatrixNV = 5357, + SpvCapabilityFragmentShaderSampleInterlockEXT = 5363, + SpvCapabilityFragmentShaderShadingRateInterlockEXT = 5372, + SpvCapabilityShaderSMBuiltinsNV = 5373, + SpvCapabilityFragmentShaderPixelInterlockEXT = 5378, + SpvCapabilityDemoteToHelperInvocation = 5379, + SpvCapabilityDemoteToHelperInvocationEXT = 5379, + SpvCapabilityRayTracingOpacityMicromapEXT = 5381, + SpvCapabilityBindlessTextureNV = 5390, + SpvCapabilitySubgroupShuffleINTEL = 5568, + SpvCapabilitySubgroupBufferBlockIOINTEL = 5569, + SpvCapabilitySubgroupImageBlockIOINTEL = 5570, + SpvCapabilitySubgroupImageMediaBlockIOINTEL = 5579, + SpvCapabilityRoundToInfinityINTEL = 5582, + SpvCapabilityFloatingPointModeINTEL = 5583, + SpvCapabilityIntegerFunctions2INTEL = 5584, + SpvCapabilityFunctionPointersINTEL = 5603, + SpvCapabilityIndirectReferencesINTEL = 5604, + SpvCapabilityAsmINTEL = 5606, + SpvCapabilityAtomicFloat32MinMaxEXT = 5612, + SpvCapabilityAtomicFloat64MinMaxEXT = 5613, + SpvCapabilityAtomicFloat16MinMaxEXT = 5616, + SpvCapabilityVectorComputeINTEL = 5617, + SpvCapabilityVectorAnyINTEL = 5619, + SpvCapabilityExpectAssumeKHR = 5629, + SpvCapabilitySubgroupAvcMotionEstimationINTEL = 5696, + SpvCapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + SpvCapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, + SpvCapabilityVariableLengthArrayINTEL = 5817, + SpvCapabilityFunctionFloatControlINTEL = 5821, + SpvCapabilityFPGAMemoryAttributesINTEL = 5824, + SpvCapabilityFPFastMathModeINTEL = 5837, + SpvCapabilityArbitraryPrecisionIntegersINTEL = 5844, + SpvCapabilityArbitraryPrecisionFloatingPointINTEL = 5845, + SpvCapabilityUnstructuredLoopControlsINTEL = 5886, + SpvCapabilityFPGALoopControlsINTEL = 5888, + SpvCapabilityKernelAttributesINTEL = 5892, + SpvCapabilityFPGAKernelAttributesINTEL = 5897, + SpvCapabilityFPGAMemoryAccessesINTEL = 5898, + SpvCapabilityFPGAClusterAttributesINTEL = 5904, + SpvCapabilityLoopFuseINTEL = 5906, + SpvCapabilityMemoryAccessAliasingINTEL = 5910, + SpvCapabilityFPGABufferLocationINTEL = 5920, + SpvCapabilityArbitraryPrecisionFixedPointINTEL = 5922, + SpvCapabilityUSMStorageClassesINTEL = 5935, + SpvCapabilityIOPipesINTEL = 5943, + SpvCapabilityBlockingPipesINTEL = 5945, + SpvCapabilityFPGARegINTEL = 5948, + SpvCapabilityDotProductInputAll = 6016, + SpvCapabilityDotProductInputAllKHR = 6016, + SpvCapabilityDotProductInput4x8Bit = 6017, + SpvCapabilityDotProductInput4x8BitKHR = 6017, + SpvCapabilityDotProductInput4x8BitPacked = 6018, + SpvCapabilityDotProductInput4x8BitPackedKHR = 6018, + SpvCapabilityDotProduct = 6019, + SpvCapabilityDotProductKHR = 6019, + SpvCapabilityRayCullMaskKHR = 6020, + SpvCapabilityBitInstructions = 6025, + SpvCapabilityGroupNonUniformRotateKHR = 6026, + SpvCapabilityAtomicFloat32AddEXT = 6033, + SpvCapabilityAtomicFloat64AddEXT = 6034, + SpvCapabilityLongConstantCompositeINTEL = 6089, + SpvCapabilityOptNoneINTEL = 6094, + SpvCapabilityAtomicFloat16AddEXT = 6095, + SpvCapabilityDebugInfoModuleINTEL = 6114, + SpvCapabilitySplitBarrierINTEL = 6141, + SpvCapabilityGroupUniformArithmeticKHR = 6400, + SpvCapabilityMax = 0x7fffffff, +} SpvCapability; + +typedef enum SpvRayFlagsShift_ { + SpvRayFlagsOpaqueKHRShift = 0, + SpvRayFlagsNoOpaqueKHRShift = 1, + SpvRayFlagsTerminateOnFirstHitKHRShift = 2, + SpvRayFlagsSkipClosestHitShaderKHRShift = 3, + SpvRayFlagsCullBackFacingTrianglesKHRShift = 4, + SpvRayFlagsCullFrontFacingTrianglesKHRShift = 5, + SpvRayFlagsCullOpaqueKHRShift = 6, + SpvRayFlagsCullNoOpaqueKHRShift = 7, + SpvRayFlagsSkipTrianglesKHRShift = 8, + SpvRayFlagsSkipAABBsKHRShift = 9, + SpvRayFlagsForceOpacityMicromap2StateEXTShift = 10, + SpvRayFlagsMax = 0x7fffffff, +} SpvRayFlagsShift; + +typedef enum SpvRayFlagsMask_ { + SpvRayFlagsMaskNone = 0, + SpvRayFlagsOpaqueKHRMask = 0x00000001, + SpvRayFlagsNoOpaqueKHRMask = 0x00000002, + SpvRayFlagsTerminateOnFirstHitKHRMask = 0x00000004, + SpvRayFlagsSkipClosestHitShaderKHRMask = 0x00000008, + SpvRayFlagsCullBackFacingTrianglesKHRMask = 0x00000010, + SpvRayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, + SpvRayFlagsCullOpaqueKHRMask = 0x00000040, + SpvRayFlagsCullNoOpaqueKHRMask = 0x00000080, + SpvRayFlagsSkipTrianglesKHRMask = 0x00000100, + SpvRayFlagsSkipAABBsKHRMask = 0x00000200, + SpvRayFlagsForceOpacityMicromap2StateEXTMask = 0x00000400, +} SpvRayFlagsMask; + +typedef enum SpvRayQueryIntersection_ { + SpvRayQueryIntersectionRayQueryCandidateIntersectionKHR = 0, + SpvRayQueryIntersectionRayQueryCommittedIntersectionKHR = 1, + SpvRayQueryIntersectionMax = 0x7fffffff, +} SpvRayQueryIntersection; + +typedef enum SpvRayQueryCommittedIntersectionType_ { + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR = 0, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR = 1, + SpvRayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR = 2, + SpvRayQueryCommittedIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCommittedIntersectionType; + +typedef enum SpvRayQueryCandidateIntersectionType_ { + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR = 0, + SpvRayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR = 1, + SpvRayQueryCandidateIntersectionTypeMax = 0x7fffffff, +} SpvRayQueryCandidateIntersectionType; + +typedef enum SpvFragmentShadingRateShift_ { + SpvFragmentShadingRateVertical2PixelsShift = 0, + SpvFragmentShadingRateVertical4PixelsShift = 1, + SpvFragmentShadingRateHorizontal2PixelsShift = 2, + SpvFragmentShadingRateHorizontal4PixelsShift = 3, + SpvFragmentShadingRateMax = 0x7fffffff, +} SpvFragmentShadingRateShift; + +typedef enum SpvFragmentShadingRateMask_ { + SpvFragmentShadingRateMaskNone = 0, + SpvFragmentShadingRateVertical2PixelsMask = 0x00000001, + SpvFragmentShadingRateVertical4PixelsMask = 0x00000002, + SpvFragmentShadingRateHorizontal2PixelsMask = 0x00000004, + SpvFragmentShadingRateHorizontal4PixelsMask = 0x00000008, +} SpvFragmentShadingRateMask; + +typedef enum SpvFPDenormMode_ { + SpvFPDenormModePreserve = 0, + SpvFPDenormModeFlushToZero = 1, + SpvFPDenormModeMax = 0x7fffffff, +} SpvFPDenormMode; + +typedef enum SpvFPOperationMode_ { + SpvFPOperationModeIEEE = 0, + SpvFPOperationModeALT = 1, + SpvFPOperationModeMax = 0x7fffffff, +} SpvFPOperationMode; + +typedef enum SpvQuantizationModes_ { + SpvQuantizationModesTRN = 0, + SpvQuantizationModesTRN_ZERO = 1, + SpvQuantizationModesRND = 2, + SpvQuantizationModesRND_ZERO = 3, + SpvQuantizationModesRND_INF = 4, + SpvQuantizationModesRND_MIN_INF = 5, + SpvQuantizationModesRND_CONV = 6, + SpvQuantizationModesRND_CONV_ODD = 7, + SpvQuantizationModesMax = 0x7fffffff, +} SpvQuantizationModes; + +typedef enum SpvOverflowModes_ { + SpvOverflowModesWRAP = 0, + SpvOverflowModesSAT = 1, + SpvOverflowModesSAT_ZERO = 2, + SpvOverflowModesSAT_SYM = 3, + SpvOverflowModesMax = 0x7fffffff, +} SpvOverflowModes; + +typedef enum SpvPackedVectorFormat_ { + SpvPackedVectorFormatPackedVectorFormat4x8Bit = 0, + SpvPackedVectorFormatPackedVectorFormat4x8BitKHR = 0, + SpvPackedVectorFormatMax = 0x7fffffff, +} SpvPackedVectorFormat; + +typedef enum SpvOp_ { + SpvOpNop = 0, + SpvOpUndef = 1, + SpvOpSourceContinued = 2, + SpvOpSource = 3, + SpvOpSourceExtension = 4, + SpvOpName = 5, + SpvOpMemberName = 6, + SpvOpString = 7, + SpvOpLine = 8, + SpvOpExtension = 10, + SpvOpExtInstImport = 11, + SpvOpExtInst = 12, + SpvOpMemoryModel = 14, + SpvOpEntryPoint = 15, + SpvOpExecutionMode = 16, + SpvOpCapability = 17, + SpvOpTypeVoid = 19, + SpvOpTypeBool = 20, + SpvOpTypeInt = 21, + SpvOpTypeFloat = 22, + SpvOpTypeVector = 23, + SpvOpTypeMatrix = 24, + SpvOpTypeImage = 25, + SpvOpTypeSampler = 26, + SpvOpTypeSampledImage = 27, + SpvOpTypeArray = 28, + SpvOpTypeRuntimeArray = 29, + SpvOpTypeStruct = 30, + SpvOpTypeOpaque = 31, + SpvOpTypePointer = 32, + SpvOpTypeFunction = 33, + SpvOpTypeEvent = 34, + SpvOpTypeDeviceEvent = 35, + SpvOpTypeReserveId = 36, + SpvOpTypeQueue = 37, + SpvOpTypePipe = 38, + SpvOpTypeForwardPointer = 39, + SpvOpConstantTrue = 41, + SpvOpConstantFalse = 42, + SpvOpConstant = 43, + SpvOpConstantComposite = 44, + SpvOpConstantSampler = 45, + SpvOpConstantNull = 46, + SpvOpSpecConstantTrue = 48, + SpvOpSpecConstantFalse = 49, + SpvOpSpecConstant = 50, + SpvOpSpecConstantComposite = 51, + SpvOpSpecConstantOp = 52, + SpvOpFunction = 54, + SpvOpFunctionParameter = 55, + SpvOpFunctionEnd = 56, + SpvOpFunctionCall = 57, + SpvOpVariable = 59, + SpvOpImageTexelPointer = 60, + SpvOpLoad = 61, + SpvOpStore = 62, + SpvOpCopyMemory = 63, + SpvOpCopyMemorySized = 64, + SpvOpAccessChain = 65, + SpvOpInBoundsAccessChain = 66, + SpvOpPtrAccessChain = 67, + SpvOpArrayLength = 68, + SpvOpGenericPtrMemSemantics = 69, + SpvOpInBoundsPtrAccessChain = 70, + SpvOpDecorate = 71, + SpvOpMemberDecorate = 72, + SpvOpDecorationGroup = 73, + SpvOpGroupDecorate = 74, + SpvOpGroupMemberDecorate = 75, + SpvOpVectorExtractDynamic = 77, + SpvOpVectorInsertDynamic = 78, + SpvOpVectorShuffle = 79, + SpvOpCompositeConstruct = 80, + SpvOpCompositeExtract = 81, + SpvOpCompositeInsert = 82, + SpvOpCopyObject = 83, + SpvOpTranspose = 84, + SpvOpSampledImage = 86, + SpvOpImageSampleImplicitLod = 87, + SpvOpImageSampleExplicitLod = 88, + SpvOpImageSampleDrefImplicitLod = 89, + SpvOpImageSampleDrefExplicitLod = 90, + SpvOpImageSampleProjImplicitLod = 91, + SpvOpImageSampleProjExplicitLod = 92, + SpvOpImageSampleProjDrefImplicitLod = 93, + SpvOpImageSampleProjDrefExplicitLod = 94, + SpvOpImageFetch = 95, + SpvOpImageGather = 96, + SpvOpImageDrefGather = 97, + SpvOpImageRead = 98, + SpvOpImageWrite = 99, + SpvOpImage = 100, + SpvOpImageQueryFormat = 101, + SpvOpImageQueryOrder = 102, + SpvOpImageQuerySizeLod = 103, + SpvOpImageQuerySize = 104, + SpvOpImageQueryLod = 105, + SpvOpImageQueryLevels = 106, + SpvOpImageQuerySamples = 107, + SpvOpConvertFToU = 109, + SpvOpConvertFToS = 110, + SpvOpConvertSToF = 111, + SpvOpConvertUToF = 112, + SpvOpUConvert = 113, + SpvOpSConvert = 114, + SpvOpFConvert = 115, + SpvOpQuantizeToF16 = 116, + SpvOpConvertPtrToU = 117, + SpvOpSatConvertSToU = 118, + SpvOpSatConvertUToS = 119, + SpvOpConvertUToPtr = 120, + SpvOpPtrCastToGeneric = 121, + SpvOpGenericCastToPtr = 122, + SpvOpGenericCastToPtrExplicit = 123, + SpvOpBitcast = 124, + SpvOpSNegate = 126, + SpvOpFNegate = 127, + SpvOpIAdd = 128, + SpvOpFAdd = 129, + SpvOpISub = 130, + SpvOpFSub = 131, + SpvOpIMul = 132, + SpvOpFMul = 133, + SpvOpUDiv = 134, + SpvOpSDiv = 135, + SpvOpFDiv = 136, + SpvOpUMod = 137, + SpvOpSRem = 138, + SpvOpSMod = 139, + SpvOpFRem = 140, + SpvOpFMod = 141, + SpvOpVectorTimesScalar = 142, + SpvOpMatrixTimesScalar = 143, + SpvOpVectorTimesMatrix = 144, + SpvOpMatrixTimesVector = 145, + SpvOpMatrixTimesMatrix = 146, + SpvOpOuterProduct = 147, + SpvOpDot = 148, + SpvOpIAddCarry = 149, + SpvOpISubBorrow = 150, + SpvOpUMulExtended = 151, + SpvOpSMulExtended = 152, + SpvOpAny = 154, + SpvOpAll = 155, + SpvOpIsNan = 156, + SpvOpIsInf = 157, + SpvOpIsFinite = 158, + SpvOpIsNormal = 159, + SpvOpSignBitSet = 160, + SpvOpLessOrGreater = 161, + SpvOpOrdered = 162, + SpvOpUnordered = 163, + SpvOpLogicalEqual = 164, + SpvOpLogicalNotEqual = 165, + SpvOpLogicalOr = 166, + SpvOpLogicalAnd = 167, + SpvOpLogicalNot = 168, + SpvOpSelect = 169, + SpvOpIEqual = 170, + SpvOpINotEqual = 171, + SpvOpUGreaterThan = 172, + SpvOpSGreaterThan = 173, + SpvOpUGreaterThanEqual = 174, + SpvOpSGreaterThanEqual = 175, + SpvOpULessThan = 176, + SpvOpSLessThan = 177, + SpvOpULessThanEqual = 178, + SpvOpSLessThanEqual = 179, + SpvOpFOrdEqual = 180, + SpvOpFUnordEqual = 181, + SpvOpFOrdNotEqual = 182, + SpvOpFUnordNotEqual = 183, + SpvOpFOrdLessThan = 184, + SpvOpFUnordLessThan = 185, + SpvOpFOrdGreaterThan = 186, + SpvOpFUnordGreaterThan = 187, + SpvOpFOrdLessThanEqual = 188, + SpvOpFUnordLessThanEqual = 189, + SpvOpFOrdGreaterThanEqual = 190, + SpvOpFUnordGreaterThanEqual = 191, + SpvOpShiftRightLogical = 194, + SpvOpShiftRightArithmetic = 195, + SpvOpShiftLeftLogical = 196, + SpvOpBitwiseOr = 197, + SpvOpBitwiseXor = 198, + SpvOpBitwiseAnd = 199, + SpvOpNot = 200, + SpvOpBitFieldInsert = 201, + SpvOpBitFieldSExtract = 202, + SpvOpBitFieldUExtract = 203, + SpvOpBitReverse = 204, + SpvOpBitCount = 205, + SpvOpDPdx = 207, + SpvOpDPdy = 208, + SpvOpFwidth = 209, + SpvOpDPdxFine = 210, + SpvOpDPdyFine = 211, + SpvOpFwidthFine = 212, + SpvOpDPdxCoarse = 213, + SpvOpDPdyCoarse = 214, + SpvOpFwidthCoarse = 215, + SpvOpEmitVertex = 218, + SpvOpEndPrimitive = 219, + SpvOpEmitStreamVertex = 220, + SpvOpEndStreamPrimitive = 221, + SpvOpControlBarrier = 224, + SpvOpMemoryBarrier = 225, + SpvOpAtomicLoad = 227, + SpvOpAtomicStore = 228, + SpvOpAtomicExchange = 229, + SpvOpAtomicCompareExchange = 230, + SpvOpAtomicCompareExchangeWeak = 231, + SpvOpAtomicIIncrement = 232, + SpvOpAtomicIDecrement = 233, + SpvOpAtomicIAdd = 234, + SpvOpAtomicISub = 235, + SpvOpAtomicSMin = 236, + SpvOpAtomicUMin = 237, + SpvOpAtomicSMax = 238, + SpvOpAtomicUMax = 239, + SpvOpAtomicAnd = 240, + SpvOpAtomicOr = 241, + SpvOpAtomicXor = 242, + SpvOpPhi = 245, + SpvOpLoopMerge = 246, + SpvOpSelectionMerge = 247, + SpvOpLabel = 248, + SpvOpBranch = 249, + SpvOpBranchConditional = 250, + SpvOpSwitch = 251, + SpvOpKill = 252, + SpvOpReturn = 253, + SpvOpReturnValue = 254, + SpvOpUnreachable = 255, + SpvOpLifetimeStart = 256, + SpvOpLifetimeStop = 257, + SpvOpGroupAsyncCopy = 259, + SpvOpGroupWaitEvents = 260, + SpvOpGroupAll = 261, + SpvOpGroupAny = 262, + SpvOpGroupBroadcast = 263, + SpvOpGroupIAdd = 264, + SpvOpGroupFAdd = 265, + SpvOpGroupFMin = 266, + SpvOpGroupUMin = 267, + SpvOpGroupSMin = 268, + SpvOpGroupFMax = 269, + SpvOpGroupUMax = 270, + SpvOpGroupSMax = 271, + SpvOpReadPipe = 274, + SpvOpWritePipe = 275, + SpvOpReservedReadPipe = 276, + SpvOpReservedWritePipe = 277, + SpvOpReserveReadPipePackets = 278, + SpvOpReserveWritePipePackets = 279, + SpvOpCommitReadPipe = 280, + SpvOpCommitWritePipe = 281, + SpvOpIsValidReserveId = 282, + SpvOpGetNumPipePackets = 283, + SpvOpGetMaxPipePackets = 284, + SpvOpGroupReserveReadPipePackets = 285, + SpvOpGroupReserveWritePipePackets = 286, + SpvOpGroupCommitReadPipe = 287, + SpvOpGroupCommitWritePipe = 288, + SpvOpEnqueueMarker = 291, + SpvOpEnqueueKernel = 292, + SpvOpGetKernelNDrangeSubGroupCount = 293, + SpvOpGetKernelNDrangeMaxSubGroupSize = 294, + SpvOpGetKernelWorkGroupSize = 295, + SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296, + SpvOpRetainEvent = 297, + SpvOpReleaseEvent = 298, + SpvOpCreateUserEvent = 299, + SpvOpIsValidEvent = 300, + SpvOpSetUserEventStatus = 301, + SpvOpCaptureEventProfilingInfo = 302, + SpvOpGetDefaultQueue = 303, + SpvOpBuildNDRange = 304, + SpvOpImageSparseSampleImplicitLod = 305, + SpvOpImageSparseSampleExplicitLod = 306, + SpvOpImageSparseSampleDrefImplicitLod = 307, + SpvOpImageSparseSampleDrefExplicitLod = 308, + SpvOpImageSparseSampleProjImplicitLod = 309, + SpvOpImageSparseSampleProjExplicitLod = 310, + SpvOpImageSparseSampleProjDrefImplicitLod = 311, + SpvOpImageSparseSampleProjDrefExplicitLod = 312, + SpvOpImageSparseFetch = 313, + SpvOpImageSparseGather = 314, + SpvOpImageSparseDrefGather = 315, + SpvOpImageSparseTexelsResident = 316, + SpvOpNoLine = 317, + SpvOpAtomicFlagTestAndSet = 318, + SpvOpAtomicFlagClear = 319, + SpvOpImageSparseRead = 320, + SpvOpSizeOf = 321, + SpvOpTypePipeStorage = 322, + SpvOpConstantPipeStorage = 323, + SpvOpCreatePipeFromPipeStorage = 324, + SpvOpGetKernelLocalSizeForSubgroupCount = 325, + SpvOpGetKernelMaxNumSubgroups = 326, + SpvOpTypeNamedBarrier = 327, + SpvOpNamedBarrierInitialize = 328, + SpvOpMemoryNamedBarrier = 329, + SpvOpModuleProcessed = 330, + SpvOpExecutionModeId = 331, + SpvOpDecorateId = 332, + SpvOpGroupNonUniformElect = 333, + SpvOpGroupNonUniformAll = 334, + SpvOpGroupNonUniformAny = 335, + SpvOpGroupNonUniformAllEqual = 336, + SpvOpGroupNonUniformBroadcast = 337, + SpvOpGroupNonUniformBroadcastFirst = 338, + SpvOpGroupNonUniformBallot = 339, + SpvOpGroupNonUniformInverseBallot = 340, + SpvOpGroupNonUniformBallotBitExtract = 341, + SpvOpGroupNonUniformBallotBitCount = 342, + SpvOpGroupNonUniformBallotFindLSB = 343, + SpvOpGroupNonUniformBallotFindMSB = 344, + SpvOpGroupNonUniformShuffle = 345, + SpvOpGroupNonUniformShuffleXor = 346, + SpvOpGroupNonUniformShuffleUp = 347, + SpvOpGroupNonUniformShuffleDown = 348, + SpvOpGroupNonUniformIAdd = 349, + SpvOpGroupNonUniformFAdd = 350, + SpvOpGroupNonUniformIMul = 351, + SpvOpGroupNonUniformFMul = 352, + SpvOpGroupNonUniformSMin = 353, + SpvOpGroupNonUniformUMin = 354, + SpvOpGroupNonUniformFMin = 355, + SpvOpGroupNonUniformSMax = 356, + SpvOpGroupNonUniformUMax = 357, + SpvOpGroupNonUniformFMax = 358, + SpvOpGroupNonUniformBitwiseAnd = 359, + SpvOpGroupNonUniformBitwiseOr = 360, + SpvOpGroupNonUniformBitwiseXor = 361, + SpvOpGroupNonUniformLogicalAnd = 362, + SpvOpGroupNonUniformLogicalOr = 363, + SpvOpGroupNonUniformLogicalXor = 364, + SpvOpGroupNonUniformQuadBroadcast = 365, + SpvOpGroupNonUniformQuadSwap = 366, + SpvOpCopyLogical = 400, + SpvOpPtrEqual = 401, + SpvOpPtrNotEqual = 402, + SpvOpPtrDiff = 403, + SpvOpTerminateInvocation = 4416, + SpvOpSubgroupBallotKHR = 4421, + SpvOpSubgroupFirstInvocationKHR = 4422, + SpvOpSubgroupAllKHR = 4428, + SpvOpSubgroupAnyKHR = 4429, + SpvOpSubgroupAllEqualKHR = 4430, + SpvOpGroupNonUniformRotateKHR = 4431, + SpvOpSubgroupReadInvocationKHR = 4432, + SpvOpTraceRayKHR = 4445, + SpvOpExecuteCallableKHR = 4446, + SpvOpConvertUToAccelerationStructureKHR = 4447, + SpvOpIgnoreIntersectionKHR = 4448, + SpvOpTerminateRayKHR = 4449, + SpvOpSDot = 4450, + SpvOpSDotKHR = 4450, + SpvOpUDot = 4451, + SpvOpUDotKHR = 4451, + SpvOpSUDot = 4452, + SpvOpSUDotKHR = 4452, + SpvOpSDotAccSat = 4453, + SpvOpSDotAccSatKHR = 4453, + SpvOpUDotAccSat = 4454, + SpvOpUDotAccSatKHR = 4454, + SpvOpSUDotAccSat = 4455, + SpvOpSUDotAccSatKHR = 4455, + SpvOpTypeRayQueryKHR = 4472, + SpvOpRayQueryInitializeKHR = 4473, + SpvOpRayQueryTerminateKHR = 4474, + SpvOpRayQueryGenerateIntersectionKHR = 4475, + SpvOpRayQueryConfirmIntersectionKHR = 4476, + SpvOpRayQueryProceedKHR = 4477, + SpvOpRayQueryGetIntersectionTypeKHR = 4479, + SpvOpGroupIAddNonUniformAMD = 5000, + SpvOpGroupFAddNonUniformAMD = 5001, + SpvOpGroupFMinNonUniformAMD = 5002, + SpvOpGroupUMinNonUniformAMD = 5003, + SpvOpGroupSMinNonUniformAMD = 5004, + SpvOpGroupFMaxNonUniformAMD = 5005, + SpvOpGroupUMaxNonUniformAMD = 5006, + SpvOpGroupSMaxNonUniformAMD = 5007, + SpvOpFragmentMaskFetchAMD = 5011, + SpvOpFragmentFetchAMD = 5012, + SpvOpReadClockKHR = 5056, + SpvOpImageSampleFootprintNV = 5283, + SpvOpEmitMeshTasksEXT = 5294, + SpvOpSetMeshOutputsEXT = 5295, + SpvOpGroupNonUniformPartitionNV = 5296, + SpvOpWritePackedPrimitiveIndices4x8NV = 5299, + SpvOpReportIntersectionKHR = 5334, + SpvOpReportIntersectionNV = 5334, + SpvOpIgnoreIntersectionNV = 5335, + SpvOpTerminateRayNV = 5336, + SpvOpTraceNV = 5337, + SpvOpTraceMotionNV = 5338, + SpvOpTraceRayMotionNV = 5339, + SpvOpTypeAccelerationStructureKHR = 5341, + SpvOpTypeAccelerationStructureNV = 5341, + SpvOpExecuteCallableNV = 5344, + SpvOpTypeCooperativeMatrixNV = 5358, + SpvOpCooperativeMatrixLoadNV = 5359, + SpvOpCooperativeMatrixStoreNV = 5360, + SpvOpCooperativeMatrixMulAddNV = 5361, + SpvOpCooperativeMatrixLengthNV = 5362, + SpvOpBeginInvocationInterlockEXT = 5364, + SpvOpEndInvocationInterlockEXT = 5365, + SpvOpDemoteToHelperInvocation = 5380, + SpvOpDemoteToHelperInvocationEXT = 5380, + SpvOpIsHelperInvocationEXT = 5381, + SpvOpConvertUToImageNV = 5391, + SpvOpConvertUToSamplerNV = 5392, + SpvOpConvertImageToUNV = 5393, + SpvOpConvertSamplerToUNV = 5394, + SpvOpConvertUToSampledImageNV = 5395, + SpvOpConvertSampledImageToUNV = 5396, + SpvOpSamplerImageAddressingModeNV = 5397, + SpvOpSubgroupShuffleINTEL = 5571, + SpvOpSubgroupShuffleDownINTEL = 5572, + SpvOpSubgroupShuffleUpINTEL = 5573, + SpvOpSubgroupShuffleXorINTEL = 5574, + SpvOpSubgroupBlockReadINTEL = 5575, + SpvOpSubgroupBlockWriteINTEL = 5576, + SpvOpSubgroupImageBlockReadINTEL = 5577, + SpvOpSubgroupImageBlockWriteINTEL = 5578, + SpvOpSubgroupImageMediaBlockReadINTEL = 5580, + SpvOpSubgroupImageMediaBlockWriteINTEL = 5581, + SpvOpUCountLeadingZerosINTEL = 5585, + SpvOpUCountTrailingZerosINTEL = 5586, + SpvOpAbsISubINTEL = 5587, + SpvOpAbsUSubINTEL = 5588, + SpvOpIAddSatINTEL = 5589, + SpvOpUAddSatINTEL = 5590, + SpvOpIAverageINTEL = 5591, + SpvOpUAverageINTEL = 5592, + SpvOpIAverageRoundedINTEL = 5593, + SpvOpUAverageRoundedINTEL = 5594, + SpvOpISubSatINTEL = 5595, + SpvOpUSubSatINTEL = 5596, + SpvOpIMul32x16INTEL = 5597, + SpvOpUMul32x16INTEL = 5598, + SpvOpConstantFunctionPointerINTEL = 5600, + SpvOpFunctionPointerCallINTEL = 5601, + SpvOpAsmTargetINTEL = 5609, + SpvOpAsmINTEL = 5610, + SpvOpAsmCallINTEL = 5611, + SpvOpAtomicFMinEXT = 5614, + SpvOpAtomicFMaxEXT = 5615, + SpvOpAssumeTrueKHR = 5630, + SpvOpExpectKHR = 5631, + SpvOpDecorateString = 5632, + SpvOpDecorateStringGOOGLE = 5632, + SpvOpMemberDecorateString = 5633, + SpvOpMemberDecorateStringGOOGLE = 5633, + SpvOpVmeImageINTEL = 5699, + SpvOpTypeVmeImageINTEL = 5700, + SpvOpTypeAvcImePayloadINTEL = 5701, + SpvOpTypeAvcRefPayloadINTEL = 5702, + SpvOpTypeAvcSicPayloadINTEL = 5703, + SpvOpTypeAvcMcePayloadINTEL = 5704, + SpvOpTypeAvcMceResultINTEL = 5705, + SpvOpTypeAvcImeResultINTEL = 5706, + SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL = 5707, + SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL = 5708, + SpvOpTypeAvcImeSingleReferenceStreaminINTEL = 5709, + SpvOpTypeAvcImeDualReferenceStreaminINTEL = 5710, + SpvOpTypeAvcRefResultINTEL = 5711, + SpvOpTypeAvcSicResultINTEL = 5712, + SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL = 5713, + SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL = 5714, + SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL = 5715, + SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL = 5716, + SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL = 5717, + SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL = 5718, + SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL = 5719, + SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL = 5720, + SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL = 5721, + SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL = 5722, + SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL = 5723, + SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL = 5724, + SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL = 5725, + SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL = 5726, + SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL = 5727, + SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL = 5728, + SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL = 5729, + SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL = 5730, + SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL = 5731, + SpvOpSubgroupAvcMceConvertToImePayloadINTEL = 5732, + SpvOpSubgroupAvcMceConvertToImeResultINTEL = 5733, + SpvOpSubgroupAvcMceConvertToRefPayloadINTEL = 5734, + SpvOpSubgroupAvcMceConvertToRefResultINTEL = 5735, + SpvOpSubgroupAvcMceConvertToSicPayloadINTEL = 5736, + SpvOpSubgroupAvcMceConvertToSicResultINTEL = 5737, + SpvOpSubgroupAvcMceGetMotionVectorsINTEL = 5738, + SpvOpSubgroupAvcMceGetInterDistortionsINTEL = 5739, + SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL = 5740, + SpvOpSubgroupAvcMceGetInterMajorShapeINTEL = 5741, + SpvOpSubgroupAvcMceGetInterMinorShapeINTEL = 5742, + SpvOpSubgroupAvcMceGetInterDirectionsINTEL = 5743, + SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL = 5744, + SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL = 5745, + SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL = 5746, + SpvOpSubgroupAvcImeInitializeINTEL = 5747, + SpvOpSubgroupAvcImeSetSingleReferenceINTEL = 5748, + SpvOpSubgroupAvcImeSetDualReferenceINTEL = 5749, + SpvOpSubgroupAvcImeRefWindowSizeINTEL = 5750, + SpvOpSubgroupAvcImeAdjustRefOffsetINTEL = 5751, + SpvOpSubgroupAvcImeConvertToMcePayloadINTEL = 5752, + SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL = 5753, + SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL = 5754, + SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL = 5755, + SpvOpSubgroupAvcImeSetWeightedSadINTEL = 5756, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL = 5757, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL = 5758, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL = 5759, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL = 5760, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL = 5761, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL = 5762, + SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL = 5763, + SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL = 5764, + SpvOpSubgroupAvcImeConvertToMceResultINTEL = 5765, + SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL = 5766, + SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL = 5767, + SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL = 5768, + SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL = 5769, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL = 5770, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL = 5771, + SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL = 5772, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL = 5773, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL = 5774, + SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL = 5775, + SpvOpSubgroupAvcImeGetBorderReachedINTEL = 5776, + SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL = 5777, + SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL = 5778, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL = 5779, + SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL = 5780, + SpvOpSubgroupAvcFmeInitializeINTEL = 5781, + SpvOpSubgroupAvcBmeInitializeINTEL = 5782, + SpvOpSubgroupAvcRefConvertToMcePayloadINTEL = 5783, + SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL = 5784, + SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL = 5785, + SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL = 5786, + SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL = 5787, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL = 5788, + SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL = 5789, + SpvOpSubgroupAvcRefConvertToMceResultINTEL = 5790, + SpvOpSubgroupAvcSicInitializeINTEL = 5791, + SpvOpSubgroupAvcSicConfigureSkcINTEL = 5792, + SpvOpSubgroupAvcSicConfigureIpeLumaINTEL = 5793, + SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL = 5794, + SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL = 5795, + SpvOpSubgroupAvcSicConvertToMcePayloadINTEL = 5796, + SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL = 5797, + SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL = 5798, + SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL = 5799, + SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL = 5800, + SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL = 5801, + SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL = 5802, + SpvOpSubgroupAvcSicEvaluateIpeINTEL = 5803, + SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL = 5804, + SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL = 5805, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL = 5806, + SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL = 5807, + SpvOpSubgroupAvcSicConvertToMceResultINTEL = 5808, + SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL = 5809, + SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL = 5810, + SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL = 5811, + SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL = 5812, + SpvOpSubgroupAvcSicGetIpeChromaModeINTEL = 5813, + SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL = 5814, + SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL = 5815, + SpvOpSubgroupAvcSicGetInterRawSadsINTEL = 5816, + SpvOpVariableLengthArrayINTEL = 5818, + SpvOpSaveMemoryINTEL = 5819, + SpvOpRestoreMemoryINTEL = 5820, + SpvOpArbitraryFloatSinCosPiINTEL = 5840, + SpvOpArbitraryFloatCastINTEL = 5841, + SpvOpArbitraryFloatCastFromIntINTEL = 5842, + SpvOpArbitraryFloatCastToIntINTEL = 5843, + SpvOpArbitraryFloatAddINTEL = 5846, + SpvOpArbitraryFloatSubINTEL = 5847, + SpvOpArbitraryFloatMulINTEL = 5848, + SpvOpArbitraryFloatDivINTEL = 5849, + SpvOpArbitraryFloatGTINTEL = 5850, + SpvOpArbitraryFloatGEINTEL = 5851, + SpvOpArbitraryFloatLTINTEL = 5852, + SpvOpArbitraryFloatLEINTEL = 5853, + SpvOpArbitraryFloatEQINTEL = 5854, + SpvOpArbitraryFloatRecipINTEL = 5855, + SpvOpArbitraryFloatRSqrtINTEL = 5856, + SpvOpArbitraryFloatCbrtINTEL = 5857, + SpvOpArbitraryFloatHypotINTEL = 5858, + SpvOpArbitraryFloatSqrtINTEL = 5859, + SpvOpArbitraryFloatLogINTEL = 5860, + SpvOpArbitraryFloatLog2INTEL = 5861, + SpvOpArbitraryFloatLog10INTEL = 5862, + SpvOpArbitraryFloatLog1pINTEL = 5863, + SpvOpArbitraryFloatExpINTEL = 5864, + SpvOpArbitraryFloatExp2INTEL = 5865, + SpvOpArbitraryFloatExp10INTEL = 5866, + SpvOpArbitraryFloatExpm1INTEL = 5867, + SpvOpArbitraryFloatSinINTEL = 5868, + SpvOpArbitraryFloatCosINTEL = 5869, + SpvOpArbitraryFloatSinCosINTEL = 5870, + SpvOpArbitraryFloatSinPiINTEL = 5871, + SpvOpArbitraryFloatCosPiINTEL = 5872, + SpvOpArbitraryFloatASinINTEL = 5873, + SpvOpArbitraryFloatASinPiINTEL = 5874, + SpvOpArbitraryFloatACosINTEL = 5875, + SpvOpArbitraryFloatACosPiINTEL = 5876, + SpvOpArbitraryFloatATanINTEL = 5877, + SpvOpArbitraryFloatATanPiINTEL = 5878, + SpvOpArbitraryFloatATan2INTEL = 5879, + SpvOpArbitraryFloatPowINTEL = 5880, + SpvOpArbitraryFloatPowRINTEL = 5881, + SpvOpArbitraryFloatPowNINTEL = 5882, + SpvOpLoopControlINTEL = 5887, + SpvOpAliasDomainDeclINTEL = 5911, + SpvOpAliasScopeDeclINTEL = 5912, + SpvOpAliasScopeListDeclINTEL = 5913, + SpvOpFixedSqrtINTEL = 5923, + SpvOpFixedRecipINTEL = 5924, + SpvOpFixedRsqrtINTEL = 5925, + SpvOpFixedSinINTEL = 5926, + SpvOpFixedCosINTEL = 5927, + SpvOpFixedSinCosINTEL = 5928, + SpvOpFixedSinPiINTEL = 5929, + SpvOpFixedCosPiINTEL = 5930, + SpvOpFixedSinCosPiINTEL = 5931, + SpvOpFixedLogINTEL = 5932, + SpvOpFixedExpINTEL = 5933, + SpvOpPtrCastToCrossWorkgroupINTEL = 5934, + SpvOpCrossWorkgroupCastToPtrINTEL = 5938, + SpvOpReadPipeBlockingINTEL = 5946, + SpvOpWritePipeBlockingINTEL = 5947, + SpvOpFPGARegINTEL = 5949, + SpvOpRayQueryGetRayTMinKHR = 6016, + SpvOpRayQueryGetRayFlagsKHR = 6017, + SpvOpRayQueryGetIntersectionTKHR = 6018, + SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR = 6019, + SpvOpRayQueryGetIntersectionInstanceIdKHR = 6020, + SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR = 6021, + SpvOpRayQueryGetIntersectionGeometryIndexKHR = 6022, + SpvOpRayQueryGetIntersectionPrimitiveIndexKHR = 6023, + SpvOpRayQueryGetIntersectionBarycentricsKHR = 6024, + SpvOpRayQueryGetIntersectionFrontFaceKHR = 6025, + SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR = 6026, + SpvOpRayQueryGetIntersectionObjectRayDirectionKHR = 6027, + SpvOpRayQueryGetIntersectionObjectRayOriginKHR = 6028, + SpvOpRayQueryGetWorldRayDirectionKHR = 6029, + SpvOpRayQueryGetWorldRayOriginKHR = 6030, + SpvOpRayQueryGetIntersectionObjectToWorldKHR = 6031, + SpvOpRayQueryGetIntersectionWorldToObjectKHR = 6032, + SpvOpAtomicFAddEXT = 6035, + SpvOpTypeBufferSurfaceINTEL = 6086, + SpvOpTypeStructContinuedINTEL = 6090, + SpvOpConstantCompositeContinuedINTEL = 6091, + SpvOpSpecConstantCompositeContinuedINTEL = 6092, + SpvOpControlBarrierArriveINTEL = 6142, + SpvOpControlBarrierWaitINTEL = 6143, + SpvOpGroupIMulKHR = 6401, + SpvOpGroupFMulKHR = 6402, + SpvOpGroupBitwiseAndKHR = 6403, + SpvOpGroupBitwiseOrKHR = 6404, + SpvOpGroupBitwiseXorKHR = 6405, + SpvOpGroupLogicalAndKHR = 6406, + SpvOpGroupLogicalOrKHR = 6407, + SpvOpGroupLogicalXorKHR = 6408, + SpvOpMax = 0x7fffffff, +} SpvOp; + +#ifdef SPV_ENABLE_UTILITY_CODE +#ifndef __cplusplus +#include +#endif +inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultType) { + *hasResult = *hasResultType = false; + switch (opcode) { + default: /* unknown opcode */ break; + case SpvOpNop: *hasResult = false; *hasResultType = false; break; + case SpvOpUndef: *hasResult = true; *hasResultType = true; break; + case SpvOpSourceContinued: *hasResult = false; *hasResultType = false; break; + case SpvOpSource: *hasResult = false; *hasResultType = false; break; + case SpvOpSourceExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpName: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberName: *hasResult = false; *hasResultType = false; break; + case SpvOpString: *hasResult = true; *hasResultType = false; break; + case SpvOpLine: *hasResult = false; *hasResultType = false; break; + case SpvOpExtension: *hasResult = false; *hasResultType = false; break; + case SpvOpExtInstImport: *hasResult = true; *hasResultType = false; break; + case SpvOpExtInst: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryModel: *hasResult = false; *hasResultType = false; break; + case SpvOpEntryPoint: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionMode: *hasResult = false; *hasResultType = false; break; + case SpvOpCapability: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeVoid: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeBool: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeInt: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFloat: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeVector: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeMatrix: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampler: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeSampledImage: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeRuntimeArray: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStruct: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeOpaque: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePointer: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeFunction: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeDeviceEvent: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeReserveId: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeQueue: *hasResult = true; *hasResultType = false; break; + case SpvOpTypePipe: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeForwardPointer: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantSampler: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantNull: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantTrue: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantFalse: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstant: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantComposite: *hasResult = true; *hasResultType = true; break; + case SpvOpSpecConstantOp: *hasResult = true; *hasResultType = true; break; + case SpvOpFunction: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionParameter: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionEnd: *hasResult = false; *hasResultType = false; break; + case SpvOpFunctionCall: *hasResult = true; *hasResultType = true; break; + case SpvOpVariable: *hasResult = true; *hasResultType = true; break; + case SpvOpImageTexelPointer: *hasResult = true; *hasResultType = true; break; + case SpvOpLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpStore: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemory: *hasResult = false; *hasResultType = false; break; + case SpvOpCopyMemorySized: *hasResult = false; *hasResultType = false; break; + case SpvOpAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpArrayLength: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericPtrMemSemantics: *hasResult = true; *hasResultType = true; break; + case SpvOpInBoundsPtrAccessChain: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorationGroup: *hasResult = true; *hasResultType = false; break; + case SpvOpGroupDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupMemberDecorate: *hasResult = false; *hasResultType = false; break; + case SpvOpVectorExtractDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorInsertDynamic: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeConstruct: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpCompositeInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyObject: *hasResult = true; *hasResultType = true; break; + case SpvOpTranspose: *hasResult = true; *hasResultType = true; break; + case SpvOpSampledImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageRead: *hasResult = true; *hasResultType = true; break; + case SpvOpImageWrite: *hasResult = false; *hasResultType = false; break; + case SpvOpImage: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryFormat: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryOrder: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySizeLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySize: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQueryLevels: *hasResult = true; *hasResultType = true; break; + case SpvOpImageQuerySamples: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToU: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertFToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSToF: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToF: *hasResult = true; *hasResultType = true; break; + case SpvOpUConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpSConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpFConvert: *hasResult = true; *hasResultType = true; break; + case SpvOpQuantizeToF16: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertPtrToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertSToU: *hasResult = true; *hasResultType = true; break; + case SpvOpSatConvertUToS: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToGeneric: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtr: *hasResult = true; *hasResultType = true; break; + case SpvOpGenericCastToPtrExplicit: *hasResult = true; *hasResultType = true; break; + case SpvOpBitcast: *hasResult = true; *hasResultType = true; break; + case SpvOpSNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpFNegate: *hasResult = true; *hasResultType = true; break; + case SpvOpIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpISub: *hasResult = true; *hasResultType = true; break; + case SpvOpFSub: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpUDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpSDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpFDiv: *hasResult = true; *hasResultType = true; break; + case SpvOpUMod: *hasResult = true; *hasResultType = true; break; + case SpvOpSRem: *hasResult = true; *hasResultType = true; break; + case SpvOpSMod: *hasResult = true; *hasResultType = true; break; + case SpvOpFRem: *hasResult = true; *hasResultType = true; break; + case SpvOpFMod: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesScalar: *hasResult = true; *hasResultType = true; break; + case SpvOpVectorTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesVector: *hasResult = true; *hasResultType = true; break; + case SpvOpMatrixTimesMatrix: *hasResult = true; *hasResultType = true; break; + case SpvOpOuterProduct: *hasResult = true; *hasResultType = true; break; + case SpvOpDot: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddCarry: *hasResult = true; *hasResultType = true; break; + case SpvOpISubBorrow: *hasResult = true; *hasResultType = true; break; + case SpvOpUMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpSMulExtended: *hasResult = true; *hasResultType = true; break; + case SpvOpAny: *hasResult = true; *hasResultType = true; break; + case SpvOpAll: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNan: *hasResult = true; *hasResultType = true; break; + case SpvOpIsInf: *hasResult = true; *hasResultType = true; break; + case SpvOpIsFinite: *hasResult = true; *hasResultType = true; break; + case SpvOpIsNormal: *hasResult = true; *hasResultType = true; break; + case SpvOpSignBitSet: *hasResult = true; *hasResultType = true; break; + case SpvOpLessOrGreater: *hasResult = true; *hasResultType = true; break; + case SpvOpOrdered: *hasResult = true; *hasResultType = true; break; + case SpvOpUnordered: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpLogicalNot: *hasResult = true; *hasResultType = true; break; + case SpvOpSelect: *hasResult = true; *hasResultType = true; break; + case SpvOpIEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpINotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpUGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpULessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpSLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThan: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordLessThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFOrdGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpFUnordGreaterThanEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftRightArithmetic: *hasResult = true; *hasResultType = true; break; + case SpvOpShiftLeftLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpNot: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldInsert: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldSExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitFieldUExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpBitReverse: *hasResult = true; *hasResultType = true; break; + case SpvOpBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdx: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdy: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidth: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyFine: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthFine: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdxCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpDPdyCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpFwidthCoarse: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpEmitStreamVertex: *hasResult = false; *hasResultType = false; break; + case SpvOpEndStreamPrimitive: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpMemoryBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicLoad: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicStore: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchange: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicCompareExchangeWeak: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIIncrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIDecrement: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicISub: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicOr: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicXor: *hasResult = true; *hasResultType = true; break; + case SpvOpPhi: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpSelectionMerge: *hasResult = false; *hasResultType = false; break; + case SpvOpLabel: *hasResult = true; *hasResultType = false; break; + case SpvOpBranch: *hasResult = false; *hasResultType = false; break; + case SpvOpBranchConditional: *hasResult = false; *hasResultType = false; break; + case SpvOpSwitch: *hasResult = false; *hasResultType = false; break; + case SpvOpKill: *hasResult = false; *hasResultType = false; break; + case SpvOpReturn: *hasResult = false; *hasResultType = false; break; + case SpvOpReturnValue: *hasResult = false; *hasResultType = false; break; + case SpvOpUnreachable: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStart: *hasResult = false; *hasResultType = false; break; + case SpvOpLifetimeStop: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAsyncCopy: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupWaitEvents: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedReadPipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReservedWritePipe: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpIsValidReserveId: *hasResult = true; *hasResultType = true; break; + case SpvOpGetNumPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGetMaxPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveReadPipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupReserveWritePipePackets: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupCommitReadPipe: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupCommitWritePipe: *hasResult = false; *hasResultType = false; break; + case SpvOpEnqueueMarker: *hasResult = true; *hasResultType = true; break; + case SpvOpEnqueueKernel: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeSubGroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelNDrangeMaxSubGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelWorkGroupSize: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelPreferredWorkGroupSizeMultiple: *hasResult = true; *hasResultType = true; break; + case SpvOpRetainEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpReleaseEvent: *hasResult = false; *hasResultType = false; break; + case SpvOpCreateUserEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpIsValidEvent: *hasResult = true; *hasResultType = true; break; + case SpvOpSetUserEventStatus: *hasResult = false; *hasResultType = false; break; + case SpvOpCaptureEventProfilingInfo: *hasResult = false; *hasResultType = false; break; + case SpvOpGetDefaultQueue: *hasResult = true; *hasResultType = true; break; + case SpvOpBuildNDRange: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefImplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseSampleProjDrefExplicitLod: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseFetch: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseDrefGather: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSparseTexelsResident: *hasResult = true; *hasResultType = true; break; + case SpvOpNoLine: *hasResult = false; *hasResultType = false; break; + case SpvOpAtomicFlagTestAndSet: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFlagClear: *hasResult = false; *hasResultType = false; break; + case SpvOpImageSparseRead: *hasResult = true; *hasResultType = true; break; + case SpvOpSizeOf: *hasResult = true; *hasResultType = true; break; + case SpvOpTypePipeStorage: *hasResult = true; *hasResultType = false; break; + case SpvOpConstantPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpCreatePipeFromPipeStorage: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelLocalSizeForSubgroupCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGetKernelMaxNumSubgroups: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeNamedBarrier: *hasResult = true; *hasResultType = false; break; + case SpvOpNamedBarrierInitialize: *hasResult = true; *hasResultType = true; break; + case SpvOpMemoryNamedBarrier: *hasResult = false; *hasResultType = false; break; + case SpvOpModuleProcessed: *hasResult = false; *hasResultType = false; break; + case SpvOpExecutionModeId: *hasResult = false; *hasResultType = false; break; + case SpvOpDecorateId: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformElect: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAll: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAny: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformAllEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBroadcastFirst: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformInverseBallot: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitExtract: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotBitCount: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindLSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBallotFindMSB: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffle: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleUp: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformShuffleDown: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFAdd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformIMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMul: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMin: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformSMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformUMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformFMax: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformBitwiseXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalAnd: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalOr: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformLogicalXor: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadBroadcast: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformQuadSwap: *hasResult = true; *hasResultType = true; break; + case SpvOpCopyLogical: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrNotEqual: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrDiff: *hasResult = true; *hasResultType = true; break; + case SpvOpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpTraceRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpSDot: *hasResult = true; *hasResultType = true; break; + case SpvOpUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDot: *hasResult = true; *hasResultType = true; break; + case SpvOpSDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; + case SpvOpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryGenerateIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryConfirmIntersectionKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpRayQueryProceedKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTypeKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupIAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFAddNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMinNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupUMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; + case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case SpvOpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTerminateRayNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case SpvOpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case SpvOpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; + case SpvOpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; + case SpvOpCooperativeMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case SpvOpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; + case SpvOpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; + case SpvOpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSamplerToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; + case SpvOpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; + case SpvOpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupShuffleXorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSubgroupImageMediaBlockReadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupImageMediaBlockWriteINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpUCountLeadingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUCountTrailingZerosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsISubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAbsUSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAddSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUAverageRoundedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpISubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUSubSatINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpIMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFMaxEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpAssumeTrueKHR: *hasResult = false; *hasResultType = false; break; + case SpvOpExpectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpMemberDecorateString: *hasResult = false; *hasResultType = false; break; + case SpvOpVmeImageINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeVmeImageINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicPayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMcePayloadINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcMceResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcImeDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcRefResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeAvcSicResultINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToImeResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToRefResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicPayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceConvertToSicResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMajorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMinorShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterDirectionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeRefWindowSizeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeAdjustRefOffsetINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeSetWeightedSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetBorderReachedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcFmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcBmeInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcRefConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicInitializeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureSkcINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMcePayloadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateIpeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicConvertToMceResultINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetIpeChromaModeINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSubgroupAvcSicGetInterRawSadsINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpVariableLengthArrayINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpSaveMemoryINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRestoreMemoryINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpArbitraryFloatSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastFromIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCastToIntINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatAddINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSubINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatMulINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatDivINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatGEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLTINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLEINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatEQINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatRSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCbrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatHypotINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatLog1pINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExp10INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatExpm1INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatASinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatACosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATanPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatATan2INTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowRINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpArbitraryFloatPowNINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpLoopControlINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpAliasDomainDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpAliasScopeListDeclINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpFixedSqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRecipINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedRsqrtINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedSinCosPiINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedLogINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFixedExpINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpPtrCastToCrossWorkgroupINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpCrossWorkgroupCastToPtrINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpReadPipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpWritePipeBlockingINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpFPGARegINTEL: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayTMinKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetRayFlagsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionTKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceIdKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionGeometryIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionPrimitiveIndexKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionBarycentricsKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionFrontFaceKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayDirectionKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetWorldRayOriginKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionObjectToWorldKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpRayQueryGetIntersectionWorldToObjectKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpAtomicFAddEXT: *hasResult = true; *hasResultType = true; break; + case SpvOpTypeBufferSurfaceINTEL: *hasResult = true; *hasResultType = false; break; + case SpvOpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case SpvOpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupBitwiseXorKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; + case SpvOpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + } +} +#endif /* SPV_ENABLE_UTILITY_CODE */ + +#endif diff --git a/python_bindings/apps/CMakeLists.txt b/python_bindings/apps/CMakeLists.txt index fd4298975a4b..1709ea434e9e 100644 --- a/python_bindings/apps/CMakeLists.txt +++ b/python_bindings/apps/CMakeLists.txt @@ -3,6 +3,11 @@ if (TARGET_WEBASSEMBLY AND Halide_TARGET MATCHES "wasm") return() endif () +if (TARGET_VULKAN AND Halide_TARGET MATCHES "vulkan") + message(WARNING "Python apps are skipped under Vulkan.") + return() +endif () + set(TEST_TMPDIR "$") set(TEST_IMAGES_DIR "$") diff --git a/python_bindings/src/halide/halide_/PyEnums.cpp b/python_bindings/src/halide/halide_/PyEnums.cpp index 17d75e0565de..4cea2899fcf1 100644 --- a/python_bindings/src/halide/halide_/PyEnums.cpp +++ b/python_bindings/src/halide/halide_/PyEnums.cpp @@ -23,6 +23,7 @@ void define_enums(py::module &m) { .value("Host", DeviceAPI::Host) .value("Default_GPU", DeviceAPI::Default_GPU) .value("CUDA", DeviceAPI::CUDA) + .value("Vulkan", DeviceAPI::Vulkan) .value("OpenCL", DeviceAPI::OpenCL) .value("OpenGLCompute", DeviceAPI::OpenGLCompute) .value("Metal", DeviceAPI::Metal) @@ -178,6 +179,15 @@ void define_enums(py::module &m) { .value("SanitizerCoverage", Target::Feature::SanitizerCoverage) .value("ProfileByTimer", Target::Feature::ProfileByTimer) .value("SPIRV", Target::Feature::SPIRV) + .value("Vulkan", Target::Feature::Vulkan) + .value("VulkanInt8", Target::VulkanInt8) + .value("VulkanInt16", Target::VulkanInt16) + .value("VulkanInt64", Target::VulkanInt64) + .value("VulkanFloat16", Target::VulkanFloat16) + .value("VulkanFloat64", Target::VulkanFloat64) + .value("VulkanV10", Target::VulkanV10) + .value("VulkanV12", Target::VulkanV12) + .value("VulkanV13", Target::VulkanV13) .value("Semihosting", Target::Feature::Semihosting) .value("FeatureEnd", Target::Feature::FeatureEnd); diff --git a/python_bindings/tutorial/CMakeLists.txt b/python_bindings/tutorial/CMakeLists.txt index 9739e07cda1f..1f9fa17f3e0f 100644 --- a/python_bindings/tutorial/CMakeLists.txt +++ b/python_bindings/tutorial/CMakeLists.txt @@ -29,6 +29,11 @@ foreach (test IN LISTS tests) continue() endif () + if (TARGET_VULKAN AND Halide_TARGET MATCHES "vulkan" AND test MATCHES "lesson_10") + message(WARNING "Not all tutorials build under Vulkan.") + continue() + endif () + cmake_path(GET test STEM test_name) add_python_test( FILE "${test}" @@ -39,6 +44,8 @@ endforeach () if (TARGET_WEBASSEMBLY AND Halide_TARGET MATCHES "wasm") message(WARNING "Not all tutorials build under WASM.") +elseif (TARGET_VULKAN AND Halide_TARGET MATCHES "vulkan") + message(WARNING "Not all tutorials build under Vulkan.") else () ## Add some hacks for getting CMake to delay compiling lesson_10_halide until after the test has run. The "better" way ## of doing this might be to treat lesson 10 like an app and give it its own CMakeLists.txt, but since this is a one-off diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2d672db7d73c..d7bf874d56b2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,6 +39,7 @@ set(HEADER_FILES CodeGen_PTX_Dev.h CodeGen_PyTorch.h CodeGen_Targets.h + CodeGen_Vulkan_Dev.h CodeGen_WebGPU_Dev.h CompilerLogger.h ConciseCasts.h @@ -207,6 +208,7 @@ set(SOURCE_FILES CodeGen_PTX_Dev.cpp CodeGen_PyTorch.cpp CodeGen_RISCV.cpp + CodeGen_Vulkan_Dev.cpp CodeGen_WebAssembly.cpp CodeGen_WebGPU_Dev.cpp CodeGen_X86.cpp @@ -441,6 +443,13 @@ set_target_properties(Halide PROPERTIES VERSION ${Halide_VERSION} SOVERSION ${Halide_SOVERSION_OVERRIDE}) +target_compile_definitions(Halide PUBLIC + HALIDE_VERSION=${Halide_VERSION} + HALIDE_VERSION_MAJOR=${Halide_VERSION_MAJOR} + HALIDE_VERSION_MINOR=${Halide_VERSION_MINOR} + HALIDE_VERSION_PATCH=${Halide_VERSION_PATCH}) + + target_include_directories(Halide INTERFACE "$") add_dependencies(Halide HalideIncludes) @@ -570,6 +579,18 @@ if (TARGET_OPENGLCOMPUTE) target_compile_definitions(Halide PRIVATE WITH_OPENGLCOMPUTE) endif () +if (TARGET_VULKAN) + message(STATUS "Enabling Vulkan target") + target_compile_definitions(Halide PRIVATE WITH_VULKAN) +endif() + +if (TARGET_SPIRV) + # Our vendored SPIRV headers are only used internally; users do not need + # them installed. + target_compile_definitions(Halide PRIVATE WITH_SPIRV) + target_link_libraries(Halide PRIVATE "$") +endif () + option(TARGET_WEBGPU "Include WebGPU target" ON) if (TARGET_WEBGPU) target_compile_definitions(Halide PRIVATE WITH_WEBGPU) @@ -582,12 +603,6 @@ if (TARGET_WEBGPU) endif () endif() -if (TARGET_SPIRV) - # Our vendored SPIRV headers are only used internally; users do not need - # them installed. - target_compile_definitions(Halide PRIVATE WITH_SPIRV) - target_link_libraries(Halide PRIVATE "$") -endif () ## # Add autoschedulers to the build. diff --git a/src/CodeGen_Internal.cpp b/src/CodeGen_Internal.cpp index 2092912412dd..b1f13d424979 100644 --- a/src/CodeGen_Internal.cpp +++ b/src/CodeGen_Internal.cpp @@ -67,6 +67,7 @@ bool function_takes_user_context(const std::string &name) { "halide_openglcompute_run", "halide_metal_run", "halide_d3d12compute_run", + "halide_vulkan_run", "halide_webgpu_run", "halide_msan_annotate_buffer_is_initialized_as_destructor", "halide_msan_annotate_buffer_is_initialized", @@ -92,6 +93,7 @@ bool function_takes_user_context(const std::string &name) { "halide_openglcompute_initialize_kernels", "halide_metal_initialize_kernels", "halide_d3d12compute_initialize_kernels", + "halide_vulkan_initialize_kernels", "halide_webgpu_initialize_kernels", "halide_get_gpu_device", "_halide_buffer_crop", diff --git a/src/CodeGen_Vulkan_Dev.cpp b/src/CodeGen_Vulkan_Dev.cpp new file mode 100644 index 000000000000..ce6e16d44316 --- /dev/null +++ b/src/CodeGen_Vulkan_Dev.cpp @@ -0,0 +1,2928 @@ +#include +#include // for dump to file +#include +#include + +#include "CSE.h" +#include "CodeGen_GPU_Dev.h" +#include "CodeGen_Internal.h" +#include "CodeGen_Vulkan_Dev.h" +#include "Debug.h" +#include "Deinterleave.h" +#include "FindIntrinsics.h" +#include "IROperator.h" +#include "IRPrinter.h" +#include "Scope.h" +#include "Simplify.h" +#include "SpirvIR.h" +#include "Target.h" + +#ifdef WITH_SPIRV + +namespace Halide { +namespace Internal { + +namespace { // anonymous + +// -- + +class CodeGen_Vulkan_Dev : public CodeGen_GPU_Dev { +public: + CodeGen_Vulkan_Dev(Target target); + + /** Compile a GPU kernel into the module. This may be called many times + * with different kernels, which will all be accumulated into a single + * source module shared by a given Halide pipeline. */ + void add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) override; + + /** (Re)initialize the GPU kernel module. This is separate from compile, + * since a GPU device module will often have many kernels compiled into it + * for a single pipeline. */ + void init_module() override; + + std::vector compile_to_src() override; + + std::string get_current_kernel_name() override; + + void dump() override; + + std::string print_gpu_name(const std::string &name) override; + + std::string api_unique_name() override { + return "vulkan"; + } + +protected: + class SPIRV_Emitter : public IRVisitor { + + public: + SPIRV_Emitter(Target t); + + using IRVisitor::visit; + + void visit(const IntImm *) override; + void visit(const UIntImm *) override; + void visit(const FloatImm *) override; + void visit(const StringImm *) override; + void visit(const Cast *) override; + void visit(const Reinterpret *) override; + void visit(const Variable *) override; + void visit(const Add *) override; + void visit(const Sub *) override; + void visit(const Mul *) override; + void visit(const Div *) override; + void visit(const Mod *) override; + void visit(const Min *) override; + void visit(const Max *) override; + void visit(const EQ *) override; + void visit(const NE *) override; + void visit(const LT *) override; + void visit(const LE *) override; + void visit(const GT *) override; + void visit(const GE *) override; + void visit(const And *) override; + void visit(const Or *) override; + void visit(const Not *) override; + void visit(const Select *) override; + void visit(const Load *) override; + void visit(const Ramp *) override; + void visit(const Broadcast *) override; + void visit(const Call *) override; + void visit(const Let *) override; + void visit(const LetStmt *) override; + void visit(const AssertStmt *) override; + void visit(const For *) override; + void visit(const Store *) override; + void visit(const Provide *) override; + void visit(const Allocate *) override; + void visit(const Free *) override; + void visit(const Realize *) override; + void visit(const ProducerConsumer *op) override; + void visit(const IfThenElse *) override; + void visit(const Evaluate *) override; + void visit(const Shuffle *) override; + void visit(const VectorReduce *) override; + void visit(const Prefetch *) override; + void visit(const Fork *) override; + void visit(const Acquire *) override; + void visit(const Atomic *) override; + + void reset(); + + // Top-level function for adding kernels + void add_kernel(const Stmt &s, const std::string &name, const std::vector &args); + void init_module(); + void compile(std::vector &binary); + void dump() const; + + // Encode the descriptor sets into a sidecar which will be added + // as a header to the module prior to the actual SPIR-V binary + void encode_header(SpvBinary &spirv_header); + + // Scalarize expressions + void scalarize(const Expr &e); + SpvId map_type_to_pair(const Type &t); + + // Workgroup size + void reset_workgroup_size(); + void find_workgroup_size(const Stmt &s); + + void declare_workgroup_size(SpvId kernel_func_id); + void declare_entry_point(const Stmt &s, SpvId kernel_func_id); + void declare_device_args(const Stmt &s, uint32_t entry_point_index, const std::string &kernel_name, const std::vector &args); + + // Common operator visitors + void visit_unary_op(SpvOp op_code, Type t, const Expr &a); + void visit_binary_op(SpvOp op_code, Type t, const Expr &a, const Expr &b); + void visit_glsl_op(SpvId glsl_op_code, Type t, const std::vector &args); + + void load_from_scalar_index(const Load *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class); + void load_from_vector_index(const Load *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class); + + void store_at_scalar_index(const Store *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); + void store_at_vector_index(const Store *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id); + + SpvFactory::Components split_vector(Type type, SpvId value_id); + SpvId join_vector(Type type, const SpvFactory::Components &value_components); + SpvId cast_type(Type target_type, Type value_type, SpvId value_id); + SpvId convert_to_bool(Type target_type, Type value_type, SpvId value_id); + + // Returns Phi node inputs. + template + SpvFactory::BlockVariables emit_if_then_else(const Expr &condition, StmtOrExpr then_case, StmtOrExpr else_case); + + template + SpvId declare_constant_int(Type value_type, int64_t value); + + template + SpvId declare_constant_uint(Type value_type, uint64_t value); + + template + SpvId declare_constant_float(Type value_type, float value); + + // Map from Halide built-in names to extended GLSL intrinsics for SPIR-V + using BuiltinMap = std::unordered_map; + const BuiltinMap glsl_builtin = { + {"acos_f16", GLSLstd450Acos}, + {"acos_f32", GLSLstd450Acos}, + {"acosh_f16", GLSLstd450Acosh}, + {"acosh_f32", GLSLstd450Acosh}, + {"asin_f16", GLSLstd450Asin}, + {"asin_f32", GLSLstd450Asin}, + {"asinh_f16", GLSLstd450Asinh}, + {"asinh_f32", GLSLstd450Asinh}, + {"atan2_f16", GLSLstd450Atan2}, + {"atan2_f32", GLSLstd450Atan2}, + {"atan_f16", GLSLstd450Atan}, + {"atan_f32", GLSLstd450Atan}, + {"atanh_f16", GLSLstd450Atanh}, + {"atanh_f32", GLSLstd450Atanh}, + {"ceil_f16", GLSLstd450Ceil}, + {"ceil_f32", GLSLstd450Ceil}, + {"cos_f16", GLSLstd450Cos}, + {"cos_f32", GLSLstd450Cos}, + {"cosh_f16", GLSLstd450Cosh}, + {"cosh_f32", GLSLstd450Cosh}, + {"exp_f16", GLSLstd450Exp}, + {"exp_f32", GLSLstd450Exp}, + {"fast_inverse_sqrt_f16", GLSLstd450InverseSqrt}, + {"fast_inverse_sqrt_f32", GLSLstd450InverseSqrt}, + {"fast_log_f16", GLSLstd450Log}, + {"fast_log_f32", GLSLstd450Log}, + {"fast_exp_f16", GLSLstd450Exp}, + {"fast_exp_f32", GLSLstd450Exp}, + {"fast_pow_f16", GLSLstd450Pow}, + {"fast_pow_f32", GLSLstd450Pow}, + {"floor_f16", GLSLstd450Floor}, + {"floor_f32", GLSLstd450Floor}, + {"log_f16", GLSLstd450Log}, + {"log_f32", GLSLstd450Log}, + {"sin_f16", GLSLstd450Sin}, + {"sin_f32", GLSLstd450Sin}, + {"sinh_f16", GLSLstd450Sinh}, + {"sinh_f32", GLSLstd450Sinh}, + {"sqrt_f16", GLSLstd450Sqrt}, + {"sqrt_f32", GLSLstd450Sqrt}, + {"tan_f16", GLSLstd450Tan}, + {"tan_f32", GLSLstd450Tan}, + {"tanh_f16", GLSLstd450Tanh}, + {"tanh_f32", GLSLstd450Tanh}, + {"trunc_f16", GLSLstd450Trunc}, + {"trunc_f32", GLSLstd450Trunc}, + {"mix", GLSLstd450FMix}, + }; + + // The SPIRV-IR builder + SpvBuilder builder; + + // The scope contains both the symbol id and its storage class + using SymbolIdStorageClassPair = std::pair; + using SymbolScope = Scope; + using ScopedSymbolBinding = ScopedBinding; + SymbolScope symbol_table; + + // Map from a variable ID to its corresponding storage type definition + struct StorageAccess { + SpvStorageClass storage_class = SpvStorageClassMax; + uint32_t storage_array_size = 0; // zero if not an array + SpvId storage_type_id = SpvInvalidId; + Type storage_type; + }; + using StorageAccessMap = std::unordered_map; + StorageAccessMap storage_access_map; + + // Defines the binding information for a specialization constant + // that is exported by the module and can be overriden at runtime + struct SpecializationBinding { + SpvId constant_id = 0; + uint32_t type_size = 0; + std::string constant_name; + }; + using SpecializationConstants = std::vector; + + // Defines a shared memory allocation + struct SharedMemoryAllocation { + SpvId constant_id = 0; // specialization constant to dynamically adjust array size (zero if not used) + uint32_t array_size = 0; + uint32_t type_size = 0; + std::string variable_name; + }; + using SharedMemoryUsage = std::vector; + + // Defines the specialization constants used for dynamically overiding the dispatch size + struct WorkgroupSizeBinding { + SpvId local_size_constant_id[3] = {0, 0, 0}; // zero if unused + }; + + // Keep track of the descriptor sets so we can add a sidecar to the + // module indicating which descriptor set to use for each entry point + struct DescriptorSet { + std::string entry_point_name; + uint32_t uniform_buffer_count = 0; + uint32_t storage_buffer_count = 0; + SpecializationConstants specialization_constants; + SharedMemoryUsage shared_memory_usage; + WorkgroupSizeBinding workgroup_size_binding; + }; + using DescriptorSetTable = std::vector; + DescriptorSetTable descriptor_set_table; + + // The workgroup size ... this indicates the extents of the 1-3 dimensional index space + // used as part of the kernel dispatch. It can also be used to adjust the layout for work + // items (aka GPU threads), based on logical groupings. If a zero sized workgroup is + // encountered during CodeGen, it is assumed that the extents are dynamic and specified + // at runtime + uint32_t workgroup_size[3]; + + // Current index of kernel for module + uint32_t kernel_index = 0; + + // Target for codegen + Target target; + + } emitter; + + std::string current_kernel_name; +}; + +// Check if all loads and stores to the member 'buffer' are dense, aligned, and +// have the same number of lanes. If this is indeed the case then the 'lanes' +// member stores the number of lanes in those loads and stores. +// +// FIXME: Refactor this and the version in CodeGen_OpenGLCompute_Dev to a common place! +// +class CheckAlignedDenseVectorLoadStore : public IRVisitor { +public: + // True if all loads and stores from the buffer are dense, aligned, and all + // have the same number of lanes, false otherwise. + bool are_all_dense = true; + + // The number of lanes in the loads and stores. If the number of lanes is + // variable, then are_all_dense is set to false regardless, and this value + // is undefined. Initially set to -1 before any dense operation is + // discovered. + int lanes = -1; + + CheckAlignedDenseVectorLoadStore(std::string name) + : buffer_name(std::move(name)) { + } + +private: + // The name of the buffer to check. + std::string buffer_name; + + using IRVisitor::visit; + + void visit(const Load *op) override { + IRVisitor::visit(op); + + if (op->name != buffer_name) { + return; + } + + if (op->type.is_scalar()) { + are_all_dense = false; + return; + } + + Expr ramp_base = strided_ramp_base(op->index); + if (!ramp_base.defined()) { + are_all_dense = false; + return; + } + + if ((op->alignment.modulus % op->type.lanes() != 0) || + (op->alignment.remainder % op->type.lanes() != 0)) { + are_all_dense = false; + return; + } + + if (lanes != -1 && op->type.lanes() != lanes) { + are_all_dense = false; + return; + } + + lanes = op->type.lanes(); + } + + void visit(const Store *op) override { + IRVisitor::visit(op); + + if (op->name != buffer_name) { + return; + } + + if (op->value.type().is_scalar()) { + are_all_dense = false; + return; + } + + Expr ramp_base = strided_ramp_base(op->index); + if (!ramp_base.defined()) { + are_all_dense = false; + return; + } + + if ((op->alignment.modulus % op->value.type().lanes() != 0) || + (op->alignment.remainder % op->value.type().lanes() != 0)) { + are_all_dense = false; + return; + } + + if (lanes != -1 && op->value.type().lanes() != lanes) { + are_all_dense = false; + return; + } + + lanes = op->value.type().lanes(); + } +}; + +struct FindWorkGroupSize : public IRVisitor { + using IRVisitor::visit; + void visit(const For *loop) override { + if (!CodeGen_GPU_Dev::is_gpu_var(loop->name)) { + return loop->body.accept(this); + } + + if ((loop->for_type == ForType::GPUBlock) || + (loop->for_type == ForType::GPUThread)) { + + // This should always be true at this point in codegen + internal_assert(is_const_zero(loop->min)); + + // Save & validate the workgroup size + int index = thread_loop_workgroup_index(loop->name); + if (index >= 0) { + const IntImm *literal = loop->extent.as(); + if (literal != nullptr) { + uint32_t new_wg_size = literal->value; + user_assert(workgroup_size[index] == 0 || workgroup_size[index] == new_wg_size) + << "Vulkan requires all kernels have the same workgroup size, " + << "but two different sizes were encountered: " + << workgroup_size[index] << " and " + << new_wg_size << " in dimension " << index << "\n"; + workgroup_size[index] = new_wg_size; + } + } + debug(4) << "Thread group size for index " << index << " is " << workgroup_size[index] << "\n"; + } + loop->body.accept(this); + } + + int thread_loop_workgroup_index(const std::string &name) { + std::string ids[] = {".__thread_id_x", + ".__thread_id_y", + ".__thread_id_z"}; + for (size_t i = 0; i < sizeof(ids) / sizeof(std::string); i++) { + if (ends_with(name, ids[i])) { + return i; + } + } + return -1; + } + + uint32_t workgroup_size[3] = {0, 0, 0}; +}; + +CodeGen_Vulkan_Dev::SPIRV_Emitter::SPIRV_Emitter(Target t) + : IRVisitor(), target(t) { + // Empty +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::scalarize(const Expr &e) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::scalarize(): " << (Expr)e << "\n"; + internal_assert(e.type().is_vector()) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::scalarize must be called with an expression of vector type.\n"; + + SpvId type_id = builder.declare_type(e.type()); + SpvId value_id = builder.declare_null_constant(e.type()); + SpvId result_id = value_id; + for (int i = 0; i < e.type().lanes(); i++) { + extract_lane(e, i).accept(this); + SpvId extracted_id = builder.current_id(); + SpvId composite_id = builder.reserve_id(SpvResultId); + SpvFactory::Indices indices = {(uint32_t)i}; + builder.append(SpvFactory::composite_insert(type_id, composite_id, extracted_id, value_id, indices)); + result_id = composite_id; + } + builder.update_id(result_id); +} + +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::map_type_to_pair(const Type &t) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::map_type_to_pair(): " << t << "\n"; + SpvId base_type_id = builder.declare_type(t); + SpvBuilder::StructMemberTypes member_type_ids = {base_type_id, base_type_id}; + const std::string struct_name = std::string("_struct_") + type_to_c_type(t, false, false) + std::string("_pair"); + SpvId struct_type_id = builder.declare_struct(struct_name, member_type_ids); + return struct_type_id; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Variable *var) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Variable): " << var->type << " " << var->name << "\n"; + SpvId variable_id = symbol_table.get(var->name).first; + user_assert(variable_id != SpvInvalidId) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Variable): Invalid symbol name!\n"; + builder.update_id(variable_id); +} + +template +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_constant_int(Type value_type, int64_t value) { + const T typed_value = (T)(value); + SpvId constant_id = builder.declare_constant(value_type, &typed_value); + builder.update_id(constant_id); + return constant_id; +} + +template +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_constant_uint(Type value_type, uint64_t value) { + const T typed_value = (T)(value); + SpvId constant_id = builder.declare_constant(value_type, &typed_value); + builder.update_id(constant_id); + return constant_id; +} + +template +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_constant_float(Type value_type, float value) { + const T typed_value = (T)(value); + SpvId constant_id = builder.declare_constant(value_type, &typed_value); + builder.update_id(constant_id); + return constant_id; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const IntImm *imm) { + if (imm->type.bits() == 8) { + declare_constant_int(imm->type, imm->value); + } else if (imm->type.bits() == 16) { + declare_constant_int(imm->type, imm->value); + } else if (imm->type.bits() == 32) { + declare_constant_int(imm->type, imm->value); + } else if (imm->type.bits() == 64) { + declare_constant_int(imm->type, imm->value); + } else { + internal_error << "Vulkan backend currently only supports 8-bit, 16-bit, 32-bit or 64-bit signed integers!\n"; + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const UIntImm *imm) { + if (imm->type.bits() == 8) { + declare_constant_uint(imm->type, imm->value); + } else if (imm->type.bits() == 16) { + declare_constant_uint(imm->type, imm->value); + } else if (imm->type.bits() == 32) { + declare_constant_uint(imm->type, imm->value); + } else if (imm->type.bits() == 64) { + declare_constant_uint(imm->type, imm->value); + } else { + internal_error << "Vulkan backend currently only supports 8-bit, 16-bit, 32-bit or 64-bit unsigned integers!\n"; + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const StringImm *imm) { + SpvId constant_id = builder.declare_string_constant(imm->value); + builder.update_id(constant_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const FloatImm *imm) { + if (imm->type.bits() == 16) { + if (imm->type.is_bfloat()) { + declare_constant_float(imm->type, imm->value); + } else { + declare_constant_float(imm->type, imm->value); + } + } else if (imm->type.bits() == 32) { + declare_constant_float(imm->type, imm->value); + } else if (imm->type.bits() == 64) { + declare_constant_float(imm->type, imm->value); + } else { + internal_error << "Vulkan backend currently only supports 16-bit, 32-bit or 64-bit floats\n"; + } +} + +template +void fill_bytes_with_value(uint8_t *bytes, int count, int value) { + T *v = reinterpret_cast(bytes); + for (int i = 0; i < count; ++i) { + v[i] = (T)value; + } +} + +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::convert_to_bool(Type target_type, Type value_type, SpvId value_id) { + if (!value_type.is_bool()) { + value_id = cast_type(Bool(), value_type, value_id); + } + + const int true_value = 1; + const int false_value = 0; + + std::vector true_data(target_type.bytes(), (uint8_t)0); + std::vector false_data(target_type.bytes(), (uint8_t)0); + + if (target_type.is_int_or_uint() && target_type.bits() == 8) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else if (target_type.is_int_or_uint() && target_type.bits() == 16) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else if (target_type.is_int_or_uint() && target_type.bits() == 32) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else if (target_type.is_int_or_uint() && target_type.bits() == 64) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else if (target_type.is_float() && target_type.bits() == 16) { + if (target_type.is_bfloat()) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } + } else if (target_type.is_float() && target_type.bits() == 32) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else if (target_type.is_float() && target_type.bits() == 64) { + fill_bytes_with_value(&true_data[0], target_type.lanes(), true_value); + fill_bytes_with_value(&false_data[0], target_type.lanes(), false_value); + } else { + user_error << "Unhandled type cast from value type '" << value_type << "' to target type '" << target_type << "'!"; + } + + SpvId result_id = builder.reserve_id(SpvResultId); + SpvId target_type_id = builder.declare_type(target_type); + SpvId true_value_id = builder.declare_constant(target_type, &true_data); + SpvId false_value_id = builder.declare_constant(target_type, &false_data); + builder.append(SpvFactory::select(target_type_id, result_id, value_id, true_value_id, false_value_id)); + return result_id; +} + +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::cast_type(Type target_type, Type value_type, SpvId value_id) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::cast_type(): casting from value type '" + << value_type << "' to target type '" << target_type << "'!\n"; + + if (value_type == target_type) { + return value_id; + } + + SpvOp op_code = SpvOpNop; + if (value_type.is_float()) { + if (target_type.is_float()) { + op_code = SpvOpFConvert; + } else if (target_type.is_bool()) { + op_code = SpvOpSelect; + } else if (target_type.is_uint()) { + op_code = SpvOpConvertFToU; + } else if (target_type.is_int()) { + op_code = SpvOpConvertFToS; + } + } else if (value_type.is_bool()) { + op_code = SpvOpSelect; + } else if (value_type.is_uint()) { + if (target_type.is_float()) { + op_code = SpvOpConvertUToF; + } else if (target_type.is_bool()) { + op_code = SpvOpSelect; + } else if (target_type.is_int_or_uint()) { + op_code = SpvOpUConvert; + } + } else if (value_type.is_int()) { + if (target_type.is_float()) { + op_code = SpvOpConvertSToF; + } else if (target_type.is_bool()) { + op_code = SpvOpSelect; + } else if (target_type.is_int_or_uint()) { + op_code = SpvOpSConvert; + } + } + + // If none of the explicit conversions matched, do a direct bitcast if the total + // size of both types is the same + if (op_code == SpvOpNop) { + if (target_type.bytes() == value_type.bytes()) { + op_code = SpvOpBitcast; + } + } + + // Error If we still didn't find a suitable cast ... + if (op_code == SpvOpNop) { + user_error << "Unhandled type cast from value type '" << value_type << "' to target type '" << target_type << "'!"; + return SpvInvalidId; + } + + SpvId result_id = SpvInvalidId; + SpvId target_type_id = builder.declare_type(target_type); + if (op_code == SpvOpBitcast) { + result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::bitcast(target_type_id, result_id, value_id)); + } else if (op_code == SpvOpSelect) { + result_id = convert_to_bool(target_type, value_type, value_id); + } else if (op_code == SpvOpUConvert && target_type.is_int()) { + // SPIR-V requires both value and target types to be unsigned and of + // different component bit widths in order to be compatible with UConvert + // ... so do the conversion to an equivalent unsigned type then bitcast this + // result into the target type + Type unsigned_type = target_type.with_code(halide_type_uint); + if (unsigned_type.bytes() != value_type.bytes()) { + SpvId unsigned_type_id = builder.declare_type(unsigned_type); + SpvId unsigned_value_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::convert(op_code, unsigned_type_id, unsigned_value_id, value_id)); + value_id = unsigned_value_id; + } + result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::bitcast(target_type_id, result_id, value_id)); + } else if (op_code == SpvOpSConvert && target_type.is_uint()) { + // Same as above but for SConvert + Type signed_type = target_type.with_code(halide_type_int); + if (signed_type.bytes() != value_type.bytes()) { + SpvId signed_type_id = builder.declare_type(signed_type); + SpvId signed_value_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::convert(op_code, signed_type_id, signed_value_id, value_id)); + value_id = signed_value_id; + } + result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::bitcast(target_type_id, result_id, value_id)); + } else { + result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::convert(op_code, target_type_id, result_id, value_id)); + } + return result_id; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Cast *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Cast): " << op->value.type() << " to " << op->type << "\n"; + + Type value_type = op->value.type(); + Type target_type = op->type; + + op->value.accept(this); + SpvId value_id = builder.current_id(); + + if ((value_type.is_vector() && target_type.is_vector())) { + if (value_type.lanes() == target_type.lanes()) { + SpvId result_id = cast_type(target_type, value_type, value_id); + builder.update_id(result_id); + } else { + user_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Cast): unhandled case " << op->value.type() << " to " << op->type << " (incompatible lanes)\n"; + } + } else if (value_type.is_scalar() && target_type.is_scalar()) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Cast): scalar type (cast)\n"; + SpvId result_id = cast_type(target_type, value_type, value_id); + builder.update_id(result_id); + } else if (value_type.bytes() == target_type.bytes()) { + SpvId result_id = cast_type(target_type, value_type, value_id); + builder.update_id(result_id); + } else { + user_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Cast): unhandled case " << op->value.type() << " to " << op->type << "\n"; + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Reinterpret *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Reinterpret): " << op->value.type() << " to " << op->type << "\n"; + SpvId type_id = builder.declare_type(op->type); + op->value.accept(this); + SpvId src_id = builder.current_id(); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::bitcast(type_id, result_id, src_id)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Add *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Add): " << op->type << " ((" << op->a << ") + (" << op->b << "))\n"; + visit_binary_op(op->type.is_float() ? SpvOpFAdd : SpvOpIAdd, op->type, op->a, op->b); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Sub *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Sub): " << op->type << " ((" << op->a << ") - (" << op->b << "))\n"; + visit_binary_op(op->type.is_float() ? SpvOpFSub : SpvOpISub, op->type, op->a, op->b); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Mul *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Mul): " << op->type << " ((" << op->a << ") * (" << op->b << "))\n"; + visit_binary_op(op->type.is_float() ? SpvOpFMul : SpvOpIMul, op->type, op->a, op->b); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Div *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Div): " << op->type << " ((" << op->a << ") / (" << op->b << "))\n"; + user_assert(!is_const_zero(op->b)) << "Division by constant zero in expression: " << Expr(op) << "\n"; + if (op->type.is_int()) { + Expr e = lower_euclidean_div(op->a, op->b); + e.accept(this); + } else if (op->type.is_uint()) { + visit_binary_op(SpvOpUDiv, op->type, op->a, op->b); + } else if (op->type.is_float()) { + visit_binary_op(SpvOpFDiv, op->type, op->a, op->b); + } else { + internal_error << "Failed to find a suitable Div operator for type: " << op->type << "\n"; + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Mod *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Mod): " << op->type << " ((" << op->a << ") % (" << op->b << "))\n"; + int bits = 0; + if (is_const_power_of_two_integer(op->b, &bits) && op->type.is_int_or_uint()) { + op->a.accept(this); + SpvId src_a_id = builder.current_id(); + + int bitwise_value = ((1 << bits) - 1); + Expr expr = make_const(op->type, bitwise_value); + expr.accept(this); + SpvId src_b_id = builder.current_id(); + + SpvId type_id = builder.declare_type(op->type); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::binary_op(SpvOpBitwiseAnd, type_id, result_id, src_a_id, src_b_id)); + builder.update_id(result_id); + } else if (op->type.is_int() || op->type.is_uint()) { + // Just exploit the Euclidean identity + Expr zero = make_zero(op->type); + Expr equiv = select(op->a == zero, zero, + op->a - (op->a / op->b) * op->b); + equiv = common_subexpression_elimination(equiv); + equiv.accept(this); + } else if (op->type.is_float()) { + // SPIR-V FMod is strangely not what we want .. FRem does what we need + visit_binary_op(SpvOpFRem, op->type, op->a, op->b); + } else { + internal_error << "Failed to find a suitable Mod operator for type: " << op->type << "\n"; + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Max *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Max): " << op->type << " Max((" << op->a << "), (" << op->b << "))\n"; + SpvId op_code = SpvOpNop; + if (op->type.is_float()) { + op_code = GLSLstd450FMax; + } else if (op->type.is_int()) { + op_code = GLSLstd450SMax; + } else if (op->type.is_uint()) { + op_code = GLSLstd450UMax; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Max *op): unhandled type: " << op->type << "\n"; + } + + std::vector args; + args.reserve(2); + if (op->type.is_vector()) { + if (op->a.type().is_scalar()) { + Expr a_vector = Broadcast::make(op->a, op->type.lanes()); + args.push_back(a_vector); + } else { + args.push_back(op->a); + } + if (op->b.type().is_scalar()) { + Expr b_vector = Broadcast::make(op->b, op->type.lanes()); + args.push_back(b_vector); + } else { + args.push_back(op->b); + } + } else { + args.push_back(op->a); + args.push_back(op->b); + } + visit_glsl_op(op_code, op->type, args); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Min *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Min): " << op->type << " Min((" << op->a << "), (" << op->b << "))\n"; + SpvId op_code = SpvOpNop; + if (op->type.is_float()) { + op_code = GLSLstd450FMin; + } else if (op->type.is_int()) { + op_code = GLSLstd450SMin; + } else if (op->type.is_uint()) { + op_code = GLSLstd450UMin; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Min *op): unhandled type: " << op->type << "\n"; + } + + std::vector args; + args.reserve(2); + if (op->type.is_vector()) { + if (op->a.type().is_scalar()) { + Expr a_vector = Broadcast::make(op->a, op->type.lanes()); + args.push_back(a_vector); + } else { + args.push_back(op->a); + } + if (op->b.type().is_scalar()) { + Expr b_vector = Broadcast::make(op->b, op->type.lanes()); + args.push_back(b_vector); + } else { + args.push_back(op->b); + } + } else { + args.push_back(op->a); + args.push_back(op->b); + } + visit_glsl_op(op_code, op->type, args); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const EQ *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(EQ): " << op->type << " (" << op->a << ") == (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const EQ *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdEqual; + } else { + op_code = SpvOpIEqual; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const NE *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(NE): " << op->type << " (" << op->a << ") != (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const NE *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdNotEqual; + } else { + op_code = SpvOpINotEqual; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + Type bool_type = UInt(1, op->type.lanes()); + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LT *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(LT): " << op->type << " (" << op->a << ") < (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LT *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdLessThan; + } else if (op->a.type().is_int()) { + op_code = SpvOpSLessThan; + } else if (op->a.type().is_uint()) { + op_code = SpvOpULessThan; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LT *op): unhandled type: " << op->a.type() << "\n"; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + Type bool_type = UInt(1, op->type.lanes()); + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LE *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(LE): " << op->type << " (" << op->a << ") <= (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LE *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdLessThanEqual; + } else if (op->a.type().is_int()) { + op_code = SpvOpSLessThanEqual; + } else if (op->a.type().is_uint()) { + op_code = SpvOpULessThanEqual; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LE *op): unhandled type: " << op->a.type() << "\n"; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + Type bool_type = UInt(1, op->type.lanes()); + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const GT *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(GT): " << op->type << " (" << op->a << ") > (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const GT *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdGreaterThan; + } else if (op->a.type().is_int()) { + op_code = SpvOpSGreaterThan; + } else if (op->a.type().is_uint()) { + op_code = SpvOpUGreaterThan; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const GT *op): unhandled type: " << op->a.type() << "\n"; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + Type bool_type = UInt(1, op->type.lanes()); + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const GE *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(GE): " << op->type << " (" << op->a << ") >= (" << op->b << ")\n"; + if (op->a.type() != op->b.type()) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LE *op): Mismatched operand types: " << op->a.type() << " != " << op->b.type() << "\n"; + } + SpvOp op_code = SpvOpNop; + if (op->a.type().is_float()) { + op_code = SpvOpFOrdGreaterThanEqual; + } else if (op->a.type().is_int()) { + op_code = SpvOpSGreaterThanEqual; + } else if (op->a.type().is_uint()) { + op_code = SpvOpUGreaterThanEqual; + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const GE *op): unhandled type: " << op->a.type() << "\n"; + } + Type bool_type = UInt(1, op->type.lanes()); + visit_binary_op(op_code, bool_type, op->a, op->b); + if (!op->type.is_bool()) { + Type bool_type = UInt(1, op->type.lanes()); + SpvId current_id = builder.current_id(); + SpvId result_id = cast_type(op->type, bool_type, current_id); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const And *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(And): " << op->type << " (" << op->a << ") && (" << op->b << ")\n"; + visit_binary_op(SpvOpLogicalAnd, op->type, op->a, op->b); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Or *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Or): " << op->type << " (" << op->a << ") || (" << op->b << ")\n"; + visit_binary_op(SpvOpLogicalOr, op->type, op->a, op->b); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Not *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Not): " << op->type << " !(" << op->a << ")\n"; + visit_unary_op(SpvOpLogicalNot, op->type, op->a); +} +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const ProducerConsumer *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(ProducerConsumer): name=" << op->name << " is_producer=" << (op->is_producer ? "true" : "false") << "\n"; + op->body.accept(this); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Call *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Call): " << op->type << " " << op->name << " args=" << (uint32_t)op->args.size() << "\n"; + + if (op->is_intrinsic(Call::gpu_thread_barrier)) { + internal_assert(op->args.size() == 1) << "gpu_thread_barrier() intrinsic must specify memory fence type.\n"; + + const auto *fence_type_ptr = as_const_int(op->args[0]); + internal_assert(fence_type_ptr) << "gpu_thread_barrier() parameter is not a constant integer.\n"; + auto fence_type = *fence_type_ptr; + + // Follow GLSL semantics for GLCompute ... + // + // barrier() -> control_barrier(Workgroup, Workgroup, AcquireRelease | WorkgroupMemory) + // + uint32_t execution_scope = SpvWorkgroupScope; + uint32_t memory_scope = SpvWorkgroupScope; + uint32_t control_mask = (SpvMemorySemanticsAcquireReleaseMask | SpvMemorySemanticsWorkgroupMemoryMask); + SpvId exec_scope_id = builder.declare_constant(UInt(32), &execution_scope); + SpvId memory_scope_id = builder.declare_constant(UInt(32), &memory_scope); + SpvId control_mask_id = builder.declare_constant(UInt(32), &control_mask); + builder.append(SpvFactory::control_barrier(exec_scope_id, memory_scope_id, control_mask_id)); + + if ((fence_type & CodeGen_GPU_Dev::MemoryFenceType::Device) || + (fence_type & CodeGen_GPU_Dev::MemoryFenceType::Shared)) { + + // groupMemoryBarrier() -> memory_barrier(Workgroup, AcquireRelease | UniformMemory | WorkgroupMemory | ImageMemory) + // + uint32_t memory_mask = (SpvMemorySemanticsAcquireReleaseMask | + SpvMemorySemanticsUniformMemoryMask | + SpvMemorySemanticsWorkgroupMemoryMask | + SpvMemorySemanticsImageMemoryMask); + SpvId memory_mask_id = builder.declare_constant(UInt(32), &memory_mask); + builder.append(SpvFactory::memory_barrier(memory_scope_id, memory_mask_id)); + } + SpvId result_id = builder.declare_null_constant(op->type); + builder.update_id(result_id); + + } else if (op->is_intrinsic(Call::abs)) { + internal_assert(op->args.size() == 1); + + SpvId op_code = SpvInvalidId; + if (op->type.is_float()) { + op_code = GLSLstd450FAbs; + } else { + op_code = GLSLstd450SAbs; + } + visit_glsl_op(op_code, op->type, op->args); + + } else if (op->is_intrinsic(Call::IntrinsicOp::round)) { + internal_assert(op->args.size() == 1); + + // GLSL RoundEven matches Halide's implementation + visit_glsl_op(GLSLstd450RoundEven, op->type, op->args); + + } else if (op->is_intrinsic(Call::absd)) { + internal_assert(op->args.size() == 2); + Expr a = op->args[0]; + Expr b = op->args[1]; + Expr e = cast(op->type, select(a < b, b - a, a - b)); + e->accept(this); + + } else if (op->is_intrinsic(Call::return_second)) { + internal_assert(op->args.size() == 2); + // Simply discard the first argument, which is generally a call to + // 'halide_printf'. + if (op->args[1].defined()) { + op->args[1]->accept(this); + } + } else if (op->is_intrinsic(Call::bitwise_and)) { + internal_assert(op->args.size() == 2); + visit_binary_op(SpvOpBitwiseAnd, op->type, op->args[0], op->args[1]); + } else if (op->is_intrinsic(Call::bitwise_xor)) { + internal_assert(op->args.size() == 2); + visit_binary_op(SpvOpBitwiseXor, op->type, op->args[0], op->args[1]); + } else if (op->is_intrinsic(Call::bitwise_or)) { + internal_assert(op->args.size() == 2); + visit_binary_op(SpvOpBitwiseOr, op->type, op->args[0], op->args[1]); + } else if (op->is_intrinsic(Call::bitwise_not)) { + internal_assert(op->args.size() == 1); + visit_unary_op(SpvOpNot, op->type, op->args[0]); + } else if (op->is_intrinsic(Call::if_then_else)) { + Expr cond = op->args[0]; + if (const Broadcast *b = cond.as()) { + cond = b->value; + } + if (cond.type().is_vector()) { + scalarize(op); + } else { + // Generate Phi node if used as an expression. + internal_assert(op->args.size() == 2 || op->args.size() == 3); + Expr else_expr; + if (op->args.size() == 3) { + else_expr = op->args[2]; + } + SpvFactory::BlockVariables block_vars = emit_if_then_else(op->args[0], op->args[1], else_expr); + SpvId type_id = builder.declare_type(op->type); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::phi(type_id, result_id, block_vars)); + builder.update_id(result_id); + } + } else if (op->is_intrinsic(Call::IntrinsicOp::div_round_to_zero)) { + internal_assert(op->args.size() == 2); + // See if we can rewrite it to something faster (e.g. a shift) + Expr e = lower_int_uint_div(op->args[0], op->args[1], /** round to zero */ true); + if (!e.as()) { + e.accept(this); + return; + } + + SpvOp op_code = SpvOpNop; + if (op->type.is_float()) { + op_code = SpvOpFDiv; + } else if (op->type.is_int()) { + op_code = SpvOpSDiv; + } else if (op->type.is_uint()) { + op_code = SpvOpUDiv; + } else { + internal_error << "div_round_to_zero of unhandled type.\n"; + } + visit_binary_op(op_code, op->type, op->args[0], op->args[1]); + } else if (op->is_intrinsic(Call::IntrinsicOp::mod_round_to_zero)) { + internal_assert(op->args.size() == 2); + SpvOp op_code = SpvOpNop; + if (op->type.is_float()) { + op_code = SpvOpFRem; // NOTE: FRem matches the fmod we expect + } else if (op->type.is_int()) { + op_code = SpvOpSMod; + } else if (op->type.is_uint()) { + op_code = SpvOpUMod; + } else { + internal_error << "mod_round_to_zero of unhandled type.\n"; + } + visit_binary_op(op_code, op->type, op->args[0], op->args[1]); + + } else if (op->is_intrinsic(Call::shift_right)) { + internal_assert(op->args.size() == 2); + if (op->type.is_uint() || (op->args[1].type().is_uint())) { + visit_binary_op(SpvOpShiftRightLogical, op->type, op->args[0], op->args[1]); + } else { + Expr e = lower_signed_shift_right(op->args[0], op->args[1]); + e.accept(this); + } + } else if (op->is_intrinsic(Call::shift_left)) { + internal_assert(op->args.size() == 2); + if (op->type.is_uint() || (op->args[1].type().is_uint())) { + visit_binary_op(SpvOpShiftLeftLogical, op->type, op->args[0], op->args[1]); + } else { + Expr e = lower_signed_shift_left(op->args[0], op->args[1]); + e.accept(this); + } + } else if (op->is_intrinsic(Call::strict_float)) { + // TODO: Enable/Disable RelaxedPrecision flags? + internal_assert(op->args.size() == 1); + op->args[0].accept(this); + } else if (op->is_intrinsic(Call::IntrinsicOp::sorted_avg)) { + internal_assert(op->args.size() == 2); + // b > a, so the following works without widening: + // a + (b - a)/2 + Expr e = op->args[0] + (op->args[1] - op->args[0]) / 2; + e.accept(this); + } else if (op->is_intrinsic(Call::lerp)) { + + // Implement lerp using GLSL's mix() function, which always uses + // floating point arithmetic. + Expr zero_val = op->args[0]; + Expr one_val = op->args[1]; + Expr weight = op->args[2]; + + internal_assert(weight.type().is_uint() || weight.type().is_float()); + if (weight.type().is_uint()) { + // Normalize integer weights to [0.0f, 1.0f] range. + internal_assert(weight.type().bits() < 32); + weight = Div::make(Cast::make(Float(32), weight), + Cast::make(Float(32), weight.type().max())); + } else if (op->type.is_uint()) { + // Round float weights down to next multiple of (1/op->type.imax()) + // to give same results as lerp based on integer arithmetic. + internal_assert(op->type.bits() < 32); + weight = floor(weight * op->type.max()) / op->type.max(); + } + + Type result_type = Float(32, op->type.lanes()); + Expr e = Call::make(result_type, "mix", {zero_val, one_val, weight}, Call::Extern); + + if (!op->type.is_float()) { + // Mirror rounding implementation of Halide's integer lerp. + e = Cast::make(op->type, floor(e + 0.5f)); + } + e.accept(this); + + } else if (op->is_intrinsic(Call::mux)) { + Expr e = lower_mux(op); + e.accept(this); + } else if (op->is_intrinsic(Call::saturating_cast)) { + Expr e = lower_intrinsic(op); + e.accept(this); + + } else if (op->is_intrinsic()) { + Expr lowered = lower_intrinsic(op); + if (lowered.defined()) { + lowered.accept(this); + } else { + internal_error << "Unhandled intrinsic in Vulkan backend: " << op->name << "\n"; + } + + } else if (op->call_type == Call::PureExtern && starts_with(op->name, "pow_f")) { + internal_assert(op->args.size() == 2); + if (can_prove(op->args[0] > 0)) { + visit_glsl_op(GLSLstd450Pow, op->type, op->args); + } else { + Expr x = op->args[0]; + Expr y = op->args[1]; + Halide::Expr abs_x_pow_y = Internal::halide_exp(Internal::halide_log(abs(x)) * y); + Halide::Expr nan_expr = Call::make(x.type(), "nan_f32", {}, Call::PureExtern); + Expr iy = floor(y); + Expr one = make_one(x.type()); + Expr zero = make_zero(x.type()); + Expr e = select(x > 0, abs_x_pow_y, // Strictly positive x + y == 0.0f, one, // x^0 == 1 + x == 0.0f, zero, // 0^y == 0 + y != iy, nan_expr, // negative x to a non-integer power + iy % 2 == 0, abs_x_pow_y, // negative x to an even power + -abs_x_pow_y); // negative x to an odd power + e = common_subexpression_elimination(e); + e.accept(this); + } + } else if (starts_with(op->name, "fast_inverse_f")) { + internal_assert(op->args.size() == 1); + + if (op->type.lanes() > 1) { + user_error << "Vulkan: Expected scalar value for fast_inverse!\n"; + } + + op->args[0].accept(this); + SpvId arg_value_id = builder.current_id(); + + SpvId one_constant_id = SpvInvalidId; + SpvId type_id = builder.declare_type(op->type); + if (op->type.is_float() && op->type.bits() == 16) { + if (op->type.is_bfloat()) { + bfloat16_t one_value = bfloat16_t(1.0f); + one_constant_id = builder.declare_constant(op->type, &one_value); + } else { + float16_t one_value = float16_t(1.0f); + one_constant_id = builder.declare_constant(op->type, &one_value); + } + } else if (op->type.is_float() && op->type.bits() == 32) { + float one_value = float(1.0f); + one_constant_id = builder.declare_constant(op->type, &one_value); + } else if (op->type.is_float() && op->type.bits() == 64) { + double one_value = double(1.0); + one_constant_id = builder.declare_constant(op->type, &one_value); + } else { + internal_error << "Vulkan: Unhandled float type in fast_inverse intrinsic!\n"; + } + internal_assert(one_constant_id != SpvInvalidId); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::binary_op(SpvOpFDiv, type_id, result_id, one_constant_id, arg_value_id)); + builder.update_id(result_id); + } else if (op->name == "nan_f32") { + float value = NAN; + SpvId result_id = builder.declare_constant(Float(32), &value); + builder.update_id(result_id); + } else if (op->name == "inf_f32") { + float value = INFINITY; + SpvId result_id = builder.declare_constant(Float(32), &value); + builder.update_id(result_id); + } else if (op->name == "neg_inf_f32") { + float value = -INFINITY; + SpvId result_id = builder.declare_constant(Float(32), &value); + builder.update_id(result_id); + } else if (starts_with(op->name, "is_nan_f")) { + internal_assert(op->args.size() == 1); + visit_unary_op((SpvOp)SpvOpIsNan, op->type, op->args[0]); + } else if (starts_with(op->name, "is_inf_f")) { + internal_assert(op->args.size() == 1); + visit_unary_op((SpvOp)SpvOpIsInf, op->type, op->args[0]); + } else if (starts_with(op->name, "is_finite_f")) { + + internal_assert(op->args.size() == 1); + visit_unary_op((SpvOp)SpvOpIsInf, op->type, op->args[0]); + SpvId is_inf_id = builder.current_id(); + visit_unary_op((SpvOp)SpvOpIsNan, op->type, op->args[0]); + SpvId is_nan_id = builder.current_id(); + + SpvId type_id = builder.declare_type(op->type); + SpvId not_is_nan_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::logical_not(type_id, not_is_nan_id, is_nan_id)); + SpvId not_is_inf_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::logical_not(type_id, not_is_inf_id, is_inf_id)); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::logical_and(type_id, result_id, not_is_inf_id, not_is_nan_id)); + builder.update_id(result_id); + + } else { + + // If its not a standard SPIR-V built-in, see if there's a GLSL extended builtin + BuiltinMap::const_iterator glsl_it = glsl_builtin.find(op->name); + if (glsl_it == glsl_builtin.end()) { + user_error << "Vulkan: unhandled SPIR-V GLSL builtin function '" << op->name << "' encountered.\n"; + } + + // Call the GLSL extended built-in + SpvId glsl_op_code = glsl_it->second; + visit_glsl_op(glsl_op_code, op->type, op->args); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Select *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Select): " << op->type << " (" << op->condition << ") ? (" << op->true_value << ") : (" << op->false_value << ")\n"; + SpvId type_id = builder.declare_type(op->type); + op->condition.accept(this); + SpvId cond_id = builder.current_id(); + op->true_value.accept(this); + SpvId true_id = builder.current_id(); + op->false_value.accept(this); + SpvId false_id = builder.current_id(); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::select(type_id, result_id, cond_id, true_id, false_id)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(const Load *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_scalar_index(): " + << "index_id=" << index_id << " " + << "variable_id=" << variable_id << " " + << "value_type=" << value_type << " " + << "storage_type=" << storage_type << " " + << "storage_class=" << storage_class << "\n"; + + // determine the base type id for the source value + SpvId base_type_id = builder.type_of(variable_id); + if (builder.is_pointer_type(base_type_id)) { + base_type_id = builder.lookup_base_type(base_type_id); + } + + SpvId storage_type_id = builder.declare_type(storage_type); + SpvId ptr_type_id = builder.declare_pointer_type(storage_type, storage_class); + + uint32_t zero = 0; + SpvId src_id = SpvInvalidId; + SpvId src_index_id = index_id; + if (storage_class == SpvStorageClassUniform) { + if (builder.is_struct_type(base_type_id)) { + SpvId zero_id = builder.declare_constant(UInt(32), &zero); + SpvFactory::Indices access_indices = {zero_id, src_index_id}; + src_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } else { + SpvFactory::Indices access_indices = {src_index_id}; + src_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } + } else if ((storage_class == SpvStorageClassWorkgroup) || (storage_class == SpvStorageClassFunction)) { + if (builder.is_array_type(base_type_id)) { + SpvFactory::Indices access_indices = {src_index_id}; + src_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } else { + src_id = variable_id; + } + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Load): unhandled storage class encountered on op: " << storage_class << "\n"; + } + internal_assert(src_id != SpvInvalidId); + + SpvId value_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(storage_type_id, value_id, src_id)); + + // if the value type doesn't match the base for the pointer type, cast it accordingly + SpvId result_id = value_id; + if (storage_type != value_type) { + result_id = cast_type(value_type, storage_type, result_id); + } + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_vector_index(const Load *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::load_from_vector_index(): " + << "variable_id=" << variable_id << " " + << "value_type=" << value_type << " " + << "storage_type=" << storage_type << " " + << "storage_class=" << storage_class << "\n"; + + internal_assert(op->index.type().is_vector()); + + // If the runtime array is a vector type, then attempt to do a + // dense vector load by using the base of the ramp divided by + // the number of lanes. + StorageAccessMap::const_iterator it = storage_access_map.find(variable_id); + if (it != storage_access_map.end()) { + storage_type = it->second.storage_type; // use the storage type for the runtime array + SpvId storage_type_id = it->second.storage_type_id; + if (builder.is_vector_type(storage_type_id)) { + Expr ramp_base = strided_ramp_base(op->index); + if (ramp_base.defined()) { + Expr ramp_index = (ramp_base / op->type.lanes()); + ramp_index.accept(this); + SpvId index_id = builder.current_id(); + load_from_scalar_index(op, index_id, variable_id, value_type, storage_type, storage_class); + return; + } + } + } + + op->index.accept(this); + SpvId index_id = builder.current_id(); + + // Gather vector elements. + SpvFactory::Components loaded_values; + Type scalar_value_type = value_type.with_lanes(1); + SpvFactory::Components index_components = split_vector(op->index.type(), index_id); + for (SpvId scalar_index : index_components) { + load_from_scalar_index(op, scalar_index, variable_id, scalar_value_type, storage_type, storage_class); + SpvId value_component_id = builder.current_id(); + loaded_values.push_back(value_component_id); + } + + // Create a composite vector from the individual loads + if (loaded_values.size() > 1) { + SpvId result_id = join_vector(value_type, loaded_values); + builder.update_id(result_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_scalar_index(const Store *op, SpvId index_id, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_scalar_index(): " + << "index_id=" << index_id << " " + << "variable_id=" << variable_id << " " + << "value_type=" << value_type << " " + << "storage_type=" << storage_type << " " + << "storage_class=" << storage_class << " " + << "value_id=" << value_id << "\n"; + + // determine the base type id for the source value + SpvId base_type_id = builder.type_of(variable_id); + if (builder.is_pointer_type(base_type_id)) { + base_type_id = builder.lookup_base_type(base_type_id); + } + + uint32_t zero = 0; + SpvId dst_id = SpvInvalidId; + SpvId dst_index_id = index_id; + + SpvId ptr_type_id = builder.declare_pointer_type(storage_type, storage_class); + if (storage_class == SpvStorageClassUniform) { + if (builder.is_struct_type(base_type_id)) { + SpvId zero_id = builder.declare_constant(UInt(32), &zero); + SpvFactory::Indices access_indices = {zero_id, dst_index_id}; + dst_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } else { + SpvFactory::Indices access_indices = {dst_index_id}; + dst_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } + } else if ((storage_class == SpvStorageClassWorkgroup) || (storage_class == SpvStorageClassFunction)) { + if (builder.is_array_type(base_type_id)) { + SpvFactory::Indices access_indices = {dst_index_id}; + dst_id = builder.declare_access_chain(ptr_type_id, variable_id, access_indices); + } else { + dst_id = variable_id; + } + } else { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Store): unhandled storage class encountered on op: " << storage_class << "\n"; + } + internal_assert(dst_id != SpvInvalidId); + + // if the value type doesn't match the base for the pointer type, cast it accordingly + if (storage_type != value_type) { + value_id = cast_type(storage_type, value_type, value_id); + } + + builder.append(SpvFactory::store(dst_id, value_id)); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_vector_index(const Store *op, SpvId variable_id, Type value_type, Type storage_type, SpvStorageClass storage_class, SpvId value_id) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::store_at_vector_index(): " + << "variable_id=" << variable_id << " " + << "value_type=" << value_type << " " + << "storage_type=" << storage_type << " " + << "storage_class=" << storage_class << "\n"; + + internal_assert(op->index.type().is_vector()); + + // If the runtime array is a vector type, then attempt to do a + // dense vector store by using the base of the ramp divided by + // the number of lanes. + StorageAccessMap::const_iterator it = storage_access_map.find(variable_id); + if (it != storage_access_map.end()) { + storage_type = it->second.storage_type; + SpvId storage_type_id = it->second.storage_type_id; + if (builder.is_vector_type(storage_type_id)) { + Expr ramp_base = strided_ramp_base(op->index); + if (ramp_base.defined()) { + Expr ramp_index = (ramp_base / op->value.type().lanes()); + ramp_index.accept(this); + SpvId index_id = builder.current_id(); + store_at_scalar_index(op, index_id, variable_id, value_type, storage_type, storage_class, value_id); + return; + } + } + } + + op->index.accept(this); + SpvId index_id = builder.current_id(); + + // Split vector value into components + internal_assert(op->index.type().lanes() <= op->value.type().lanes()); + SpvFactory::Components value_components = split_vector(op->value.type(), value_id); + SpvFactory::Components index_components = split_vector(op->index.type(), index_id); + + // Scatter vector elements. + Type scalar_value_type = op->value.type().with_lanes(1); + for (uint32_t i = 0; i < index_components.size(); i++) { + SpvId index_component_id = index_components[i]; + SpvId value_component_id = value_components[i]; + store_at_scalar_index(op, index_component_id, variable_id, scalar_value_type, storage_type, storage_class, value_component_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Load *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Load): " << op->type << " " << op->name << "[" << op->index << "]\n"; + user_assert(is_const_one(op->predicate)) << "Predicated loads not supported by SPIR-V codegen\n"; + + // Construct the pointer to read from + internal_assert(symbol_table.contains(op->name)); + SymbolIdStorageClassPair id_and_storage_class = symbol_table.get(op->name); + SpvId variable_id = id_and_storage_class.first; + SpvStorageClass storage_class = id_and_storage_class.second; + internal_assert(variable_id != SpvInvalidId); + internal_assert(((uint32_t)storage_class) < ((uint32_t)SpvStorageClassMax)); + + // If this is a load from a buffer block (mapped to a halide buffer) or + // GPU shared memory, the pointer type must match the declared storage + // type for the runtime array. + Type value_type = op->type; + Type storage_type = value_type; + StorageAccessMap::const_iterator it = storage_access_map.find(variable_id); + if (it != storage_access_map.end()) { + storage_type = it->second.storage_type; + } + + debug(2) << " value_type=" << op->type << " storage_type=" << storage_type << "\n"; + debug(2) << " index_type=" << op->index.type() << " index=" << op->index << "\n"; + + if (op->index.type().is_scalar()) { + op->index.accept(this); + SpvId index_id = builder.current_id(); + load_from_scalar_index(op, index_id, variable_id, value_type, storage_type, storage_class); + } else { + load_from_vector_index(op, variable_id, value_type, storage_type, storage_class); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Store *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Store): " << op->name << "[" << op->index << "] = (" << op->value << ")\n"; + user_assert(is_const_one(op->predicate)) << "Predicated stores not supported by SPIR-V codegen!\n"; + + debug(2) << " value_type=" << op->value.type() << " value=" << op->value << "\n"; + op->value.accept(this); + SpvId value_id = builder.current_id(); + + internal_assert(symbol_table.contains(op->name)); + SymbolIdStorageClassPair id_and_storage_class = symbol_table.get(op->name); + SpvId variable_id = id_and_storage_class.first; + SpvStorageClass storage_class = id_and_storage_class.second; + internal_assert(variable_id != SpvInvalidId); + internal_assert(((uint32_t)storage_class) < ((uint32_t)SpvStorageClassMax)); + + Type value_type = op->value.type(); + Type storage_type = value_type; + + // If this is a store to a buffer block (mapped to a halide buffer) or + // GPU shared memory, the pointer type must match the declared storage + // type for the runtime array + StorageAccessMap::const_iterator it = storage_access_map.find(variable_id); + if (it != storage_access_map.end()) { + storage_type = it->second.storage_type; + } + + debug(2) << " value_type=" << value_type << " storage_type=" << storage_type << "\n"; + debug(2) << " index_type=" << op->index.type() << " index=" << op->index << "\n"; + if (op->index.type().is_scalar()) { + op->index.accept(this); + SpvId index_id = builder.current_id(); + store_at_scalar_index(op, index_id, variable_id, value_type, storage_type, storage_class, value_id); + } else { + store_at_vector_index(op, variable_id, value_type, storage_type, storage_class, value_id); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Let *let) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Let): " << (Expr)let << "\n"; + let->value.accept(this); + SpvId current_id = builder.current_id(); + ScopedSymbolBinding binding(symbol_table, let->name, {current_id, SpvStorageClassFunction}); + let->body.accept(this); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const LetStmt *let) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(LetStmt): " << let->name << "\n"; + let->value.accept(this); + SpvId current_id = builder.current_id(); + ScopedSymbolBinding binding(symbol_table, let->name, {current_id, SpvStorageClassFunction}); + let->body.accept(this); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const AssertStmt *stmt) { + // TODO: Fill this in. + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(AssertStmt): " + << "condition=" << stmt->condition << " " + << "message=" << stmt->message << "\n"; +} + +namespace { +std::pair simt_intrinsic(const std::string &name) { + if (ends_with(name, ".__thread_id_x")) { + return {"LocalInvocationId", 0}; + } else if (ends_with(name, ".__thread_id_y")) { + return {"LocalInvocationId", 1}; + } else if (ends_with(name, ".__thread_id_z")) { + return {"LocalInvocationId", 2}; + } else if (ends_with(name, ".__block_id_x")) { + return {"WorkgroupId", 0}; + } else if (ends_with(name, ".__block_id_y")) { + return {"WorkgroupId", 1}; + } else if (ends_with(name, ".__block_id_z")) { + return {"WorkgroupId", 2}; + } else if (ends_with(name, "id_w")) { + user_error << "Vulkan only supports <=3 dimensions for gpu blocks"; + } + internal_error << "simt_intrinsic called on bad variable name: " << name << "\n"; + return {"", -1}; +} + +} // anonymous namespace + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const For *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(For): name=" << op->name << " min=" << op->min << " extent=" << op->extent << "\n"; + + if (is_gpu_var(op->name)) { + internal_assert((op->for_type == ForType::GPUBlock) || + (op->for_type == ForType::GPUThread)) + << "kernel loops must be either gpu block or gpu thread\n"; + + // This should always be true at this point in codegen + internal_assert(is_const_zero(op->min)); + auto intrinsic = simt_intrinsic(op->name); + const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + intrinsic.first; + + // Intrinsics are inserted when adding the kernel + internal_assert(symbol_table.contains(intrinsic_var_name)); + SpvId intrinsic_id = symbol_table.get(intrinsic_var_name).first; + SpvStorageClass storage_class = symbol_table.get(intrinsic_var_name).second; + + // extract and cast to the extent type (which is what's expected by Halide's for loops) + Type unsigned_type = UInt(32); + SpvId unsigned_type_id = builder.declare_type(unsigned_type); + SpvId unsigned_value_id = builder.reserve_id(SpvResultId); + SpvFactory::Indices indices = {intrinsic.second}; + builder.append(SpvFactory::composite_extract(unsigned_type_id, unsigned_value_id, intrinsic_id, indices)); + SpvId intrinsic_value_id = cast_type(op->min.type(), unsigned_type, unsigned_value_id); + { + ScopedSymbolBinding binding(symbol_table, op->name, {intrinsic_value_id, storage_class}); + op->body.accept(this); + } + } else { + + debug(2) << " (serial for loop): min=" << op->min << " extent=" << op->extent << "\n"; + + internal_assert(op->for_type == ForType::Serial) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit unhandled For type: " << op->for_type << "\n"; + user_assert(op->min.type() == op->extent.type()); + user_assert(op->min.type().is_int() || op->min.type().is_uint()); + + op->min.accept(this); + SpvId min_id = builder.current_id(); + op->extent.accept(this); + SpvId extent_id = builder.current_id(); + + // Compute max. + Type index_type = op->min.type(); + SpvId index_type_id = builder.declare_type(index_type); + SpvStorageClass storage_class = SpvStorageClassFunction; + SpvId index_var_type_id = builder.declare_pointer_type(index_type_id, storage_class); + SpvId max_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::integer_add(index_type_id, max_id, min_id, extent_id)); + + // Declare loop var + const std::string loop_var_name = unique_name(std::string("k") + std::to_string(kernel_index) + "_loop_idx"); + debug(2) << " loop_index=" << loop_var_name << " type=" << index_type << "\n"; + SpvId loop_var_id = builder.declare_variable(loop_var_name, index_var_type_id, storage_class); + symbol_table.push(loop_var_name, {loop_var_id, storage_class}); + + SpvId header_block_id = builder.reserve_id(SpvBlockId); + SpvId top_block_id = builder.reserve_id(SpvBlockId); + SpvId body_block_id = builder.reserve_id(SpvBlockId); + SpvId continue_block_id = builder.reserve_id(SpvBlockId); + SpvId merge_block_id = builder.reserve_id(SpvBlockId); + + builder.append(SpvFactory::store(loop_var_id, min_id)); + SpvBlock header_block = builder.create_block(header_block_id); + builder.enter_block(header_block); + { + builder.append(SpvFactory::loop_merge(merge_block_id, continue_block_id, SpvLoopControlDontUnrollMask)); + builder.append(SpvFactory::branch(top_block_id)); + } + builder.leave_block(); + + SpvId loop_index_id = builder.reserve_id(SpvResultId); + SpvBlock top_block = builder.create_block(top_block_id); + builder.enter_block(top_block); + { + SpvId loop_test_type_id = builder.declare_type(Bool()); + SpvId loop_test_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(index_type_id, loop_index_id, loop_var_id)); + builder.append(SpvFactory::integer_less_than(loop_test_type_id, loop_test_id, loop_index_id, max_id, index_type.is_uint())); + builder.append(SpvFactory::conditional_branch(loop_test_id, body_block_id, merge_block_id)); + } + builder.leave_block(); + + SpvBlock body_block = builder.create_block(body_block_id); + builder.enter_block(body_block); + { + ScopedSymbolBinding binding(symbol_table, op->name, {loop_index_id, storage_class}); + op->body.accept(this); + builder.append(SpvFactory::branch(continue_block_id)); + } + builder.leave_block(); + + SpvBlock continue_block = builder.create_block(continue_block_id); + builder.enter_block(continue_block); + { + // Update loop variable + int32_t one = 1; + SpvId next_index_id = builder.reserve_id(SpvResultId); + SpvId constant_one_id = builder.declare_constant(index_type, &one); + SpvId current_index_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(index_type_id, current_index_id, loop_var_id)); + builder.append(SpvFactory::integer_add(index_type_id, next_index_id, current_index_id, constant_one_id)); + builder.append(SpvFactory::store(loop_var_id, next_index_id)); + builder.append(SpvFactory::branch(header_block_id)); + } + builder.leave_block(); + symbol_table.pop(loop_var_name); + + SpvBlock merge_block = builder.create_block(merge_block_id); + builder.enter_block(merge_block); + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Ramp *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Ramp): " + << "base=" << op->base << " " + << "stride=" << op->stride << " " + << "lanes=" << (uint32_t)op->lanes << "\n"; + + // TODO: Is there a way to do this that doesn't require duplicating lane values? + SpvId base_type_id = builder.declare_type(op->base.type()); + SpvId type_id = builder.declare_type(op->type); + op->base.accept(this); + SpvId base_id = builder.current_id(); + op->stride.accept(this); + SpvId stride_id = builder.current_id(); + + // Generate adds to make the elements of the ramp. + SpvId prev_id = base_id; + SpvFactory::Components constituents = {base_id}; + for (int i = 1; i < op->lanes; i++) { + SpvId this_id = builder.reserve_id(SpvResultId); + if (op->base.type().is_float()) { + builder.append(SpvFactory::float_add(base_type_id, this_id, prev_id, stride_id)); + } else if (op->base.type().is_int_or_uint()) { + builder.append(SpvFactory::integer_add(base_type_id, this_id, prev_id, stride_id)); + } else { + internal_error << "SPIRV: Unhandled base type encountered in ramp!\n"; + } + constituents.push_back(this_id); + prev_id = this_id; + } + + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_construct(type_id, result_id, constituents)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Broadcast *op) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Broadcast): " + << "type=" << op->type << " " + << "value=" << op->value << "\n"; + + // TODO: Is there a way to do this that doesn't require duplicating lane values? + SpvId type_id = builder.declare_type(op->type); + op->value.accept(this); + SpvId value_id = builder.current_id(); + SpvId result_id = builder.reserve_id(SpvResultId); + + SpvFactory::Components constituents; + constituents.insert(constituents.end(), op->lanes, value_id); + builder.append(SpvFactory::composite_construct(type_id, result_id, constituents)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Provide *) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Provide *): Provide encountered during codegen\n"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Allocate *op) { + + SpvId storage_type_id = builder.declare_type(op->type); + SpvId array_type_id = SpvInvalidId; + SpvId variable_id = SpvInvalidId; + uint32_t array_size = 0; + + SpvStorageClass storage_class = SpvStorageClassGeneric; + if (op->memory_type == MemoryType::GPUShared) { + + // Allocation of shared memory must be declared at global scope + storage_class = SpvStorageClassWorkgroup; // shared across workgroup + std::string variable_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + op->name; + uint32_t type_size = op->type.bytes(); + uint32_t constant_id = 0; + + // static fixed size allocation + if (op->extents.size() == 1 && is_const(op->extents[0])) { + array_size = op->constant_allocation_size(); + array_type_id = builder.declare_type(op->type, array_size); + builder.add_symbol(variable_name + "_array_type", array_type_id, builder.current_module().id()); + debug(2) << "Vulkan: Allocate (fixed-size) " << op->name << " type=" << op->type << " array_size=" << (uint32_t)array_size << " in shared memory on device in global scope\n"; + + } else { + // dynamic allocation with unknown size at compile time ... + + // declare the array size as a specialization constant (which will get overridden at runtime) + Type array_size_type = UInt(32); + array_size = std::max(workgroup_size[0], uint32_t(1)); // use one item per workgroup as an initial guess + SpvId array_size_id = builder.declare_specialization_constant(array_size_type, &array_size); + array_type_id = builder.add_array_with_default_size(storage_type_id, array_size_id); + builder.add_symbol(variable_name + "_array_type", array_type_id, builder.current_module().id()); + + debug(2) << "Vulkan: Allocate (dynamic size) " << op->name << " type=" << op->type << " default_size=" << (uint32_t)array_size << " in shared memory on device in global scope\n"; + + // bind the specialization constant to the next slot + std::string constant_name = variable_name + "_array_size"; + constant_id = (uint32_t)(descriptor_set_table.back().specialization_constants.size() + 1); + SpvBuilder::Literals spec_id = {constant_id}; + builder.add_annotation(array_size_id, SpvDecorationSpecId, spec_id); + builder.add_symbol(constant_name, array_size_id, builder.current_module().id()); + + // update the descriptor set with the specialization binding + SpecializationBinding spec_binding = {constant_id, (uint32_t)array_size_type.bytes(), constant_name}; + descriptor_set_table.back().specialization_constants.push_back(spec_binding); + } + + // add the shared memory allocation to the descriptor set + SharedMemoryAllocation shared_mem_allocation = {constant_id, array_size, type_size, variable_name}; + descriptor_set_table.back().shared_memory_usage.push_back(shared_mem_allocation); + + // declare the variable + SpvId ptr_type_id = builder.declare_pointer_type(array_type_id, storage_class); + variable_id = builder.declare_global_variable(variable_name, ptr_type_id, storage_class); + + } else { + + // Allocation is not a shared memory allocation, just make a local declaration. + array_size = op->constant_allocation_size(); + + // It must have a constant size. + user_assert(array_size > 0) + << "Allocation " << op->name << " has a dynamic size. " + << "Only fixed-size local allocations are supported with Vulkan."; + + debug(2) << "Vulkan: Allocate " << op->name << " type=" << op->type << " size=" << (uint32_t)array_size << " on device in function scope\n"; + + array_type_id = builder.declare_type(op->type, array_size); + storage_class = SpvStorageClassFunction; // function scope + std::string variable_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + op->name; + SpvId ptr_type_id = builder.declare_pointer_type(array_type_id, storage_class); + variable_id = builder.declare_variable(variable_name, ptr_type_id, storage_class); + } + + StorageAccess access; + access.storage_class = storage_class; + access.storage_array_size = array_size; + access.storage_type_id = storage_type_id; + access.storage_type = op->type; + storage_access_map[variable_id] = access; + + debug(3) << "Vulkan: Pushing allocation called " << op->name << " onto the symbol table\n"; + symbol_table.push(op->name, {variable_id, storage_class}); + op->body.accept(this); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Free *op) { + debug(3) << "Vulkan: Popping allocation called " << op->name << " off the symbol table\n"; + internal_assert(symbol_table.contains(op->name)); + SpvId variable_id = symbol_table.get(op->name).first; + storage_access_map.erase(variable_id); + symbol_table.pop(op->name); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Realize *) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Realize *): Realize encountered during codegen\n"; +} + +template +SpvFactory::BlockVariables +CodeGen_Vulkan_Dev::SPIRV_Emitter::emit_if_then_else(const Expr &condition, + StmtOrExpr then_case, StmtOrExpr else_case) { + + SpvId merge_block_id = builder.reserve_id(SpvBlockId); + SpvId if_block_id = builder.reserve_id(SpvBlockId); + SpvId then_block_id = builder.reserve_id(SpvBlockId); + SpvId else_block_id = else_case.defined() ? builder.reserve_id(SpvBlockId) : merge_block_id; + + SpvFactory::BlockVariables block_vars; + + // If block + debug(2) << "Vulkan: If => (" << condition << " )\n"; + SpvBlock if_block = builder.create_block(if_block_id); + builder.enter_block(if_block); + { + condition.accept(this); + SpvId cond_id = builder.current_id(); + builder.append(SpvFactory::selection_merge(merge_block_id, SpvSelectionControlMaskNone)); + builder.append(SpvFactory::conditional_branch(cond_id, then_block_id, else_block_id)); + } + builder.leave_block(); + + // Then block + debug(2) << "Vulkan: Then =>\n" + << then_case << "\n"; + SpvBlock then_block = builder.create_block(then_block_id); + builder.enter_block(then_block); + { + then_case.accept(this); + SpvId then_id = builder.current_id(); + builder.append(SpvFactory::branch(merge_block_id)); + block_vars.push_back({then_id, then_block_id}); + } + builder.leave_block(); + + // Else block (optional) + if (else_case.defined()) { + debug(2) << "Vulkan: Else =>\n" + << else_case << "\n"; + SpvBlock else_block = builder.create_block(else_block_id); + builder.enter_block(else_block); + { + else_case.accept(this); + SpvId else_id = builder.current_id(); + builder.append(SpvFactory::branch(merge_block_id)); + block_vars.push_back({else_id, else_block_id}); + } + builder.leave_block(); + } + + // Merge block + SpvBlock merge_block = builder.create_block(merge_block_id); + builder.enter_block(merge_block); + return block_vars; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const IfThenElse *op) { + if (!builder.current_function().is_defined()) { + user_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const IfThenElse *op): No active function for building!!\n"; + } + emit_if_then_else(op->condition, op->then_case, op->else_case); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Evaluate *op) { + op->value.accept(this); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Shuffle *op) { + std::cout << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): " + << "type=" << op->type << " " + << "vectors=" << (uint32_t)op->vectors.size() << " " + << "is_interleave=" << (op->is_interleave() ? "true" : "false") << " " + << "is_extract_element=" << (op->is_extract_element() ? "true" : "false") << "\n"; + + // Traverse all the arg vectors + uint32_t arg_idx = 0; + SpvFactory::Operands arg_ids; + arg_ids.reserve(op->vectors.size()); + for (const Expr &e : op->vectors) { + debug(2) << " CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(Shuffle): Arg[" << arg_idx++ << "] => " << e << "\n"; + e.accept(this); + arg_ids.push_back(builder.current_id()); + } + + if (op->is_interleave()) { + int op_lanes = op->type.lanes(); + internal_assert(!arg_ids.empty()); + int arg_lanes = op->vectors[0].type().lanes(); + + std::cout << " vector interleave x" << (uint32_t)op->vectors.size() << " : "; + for (int idx : op->indices) { + std::cout << idx << " "; + } + std::cout << "\n"; + + if (arg_ids.size() == 1) { + + // 1 argument, just do a simple assignment via a cast + SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]); + builder.update_id(result_id); + + } else if (arg_ids.size() == 2) { + + // 2 arguments, use a composite insert to update even and odd indices + uint32_t even_idx = 0; + uint32_t odd_idx = 1; + SpvFactory::Indices even_indices; + SpvFactory::Indices odd_indices; + for (int i = 0; i < op_lanes; ++i) { + even_indices.push_back(even_idx); + odd_indices.push_back(odd_idx); + even_idx += 2; + odd_idx += 2; + } + + SpvId type_id = builder.declare_type(op->type); + SpvId value_id = builder.declare_null_constant(op->type); + SpvId partial_id = builder.reserve_id(SpvResultId); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_insert(type_id, partial_id, arg_ids[0], value_id, even_indices)); + builder.append(SpvFactory::composite_insert(type_id, result_id, arg_ids[1], partial_id, odd_indices)); + builder.update_id(result_id); + + } else { + // 3+ arguments, shuffle via a vector literal + // selecting the appropriate elements of the vectors + int num_vectors = (int)op->vectors.size(); + std::vector vector_component_ids(num_vectors); + for (uint32_t i = 0; i < (uint32_t)arg_ids.size(); ++i) { + if (op->vectors[i].type().is_vector()) { + vector_component_ids[i] = split_vector(op->vectors[i].type(), arg_ids[i]); + } else { + vector_component_ids[i] = {arg_ids[i]}; + } + } + + SpvFactory::Components result_component_ids(op_lanes); + for (int i = 0; i < op_lanes; i++) { + int arg = i % num_vectors; + int arg_idx = i / num_vectors; + internal_assert(arg_idx <= arg_lanes); + result_component_ids[i] = vector_component_ids[arg][arg_idx]; + } + + SpvId result_id = join_vector(op->type, result_component_ids); + builder.update_id(result_id); + } + } else if (op->is_extract_element()) { + int idx = op->indices[0]; + internal_assert(idx >= 0); + internal_assert(idx <= op->vectors[0].type().lanes()); + if (op->vectors[0].type().is_vector()) { + SpvFactory::Indices indices = {(uint32_t)idx}; + SpvId type_id = builder.declare_type(op->type); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_extract(type_id, result_id, arg_ids[0], indices)); + builder.update_id(result_id); + } else { + SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]); + builder.update_id(result_id); + } + } else if (op->type.is_scalar()) { + // Deduce which vector we need. Apparently it's not required + // that all vectors have identical lanes, so a loop is required. + // Since idx of -1 means "don't care", we'll treat it as 0 to simplify. + SpvId result_id = SpvInvalidId; + int idx = std::max(0, op->indices[0]); + for (size_t vec_idx = 0; vec_idx < op->vectors.size(); vec_idx++) { + const int vec_lanes = op->vectors[vec_idx].type().lanes(); + if (idx < vec_lanes) { + if (op->vectors[vec_idx].type().is_vector()) { + SpvFactory::Indices indices = {(uint32_t)idx}; + SpvId type_id = builder.declare_type(op->type); + result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_extract(type_id, result_id, arg_ids[vec_idx], indices)); + } else { + result_id = arg_ids[vec_idx]; + } + break; + } + idx -= vec_lanes; + } + + } else { + + // vector shuffle ... not interleaving + int op_lanes = op->type.lanes(); + int num_vectors = (int)op->vectors.size(); + + std::cout << " vector shuffle x" << num_vectors << " : "; + for (int idx : op->indices) { + std::cout << idx << " "; + } + std::cout << "\n"; + + if (num_vectors == 1) { + // 1 argument, just do a simple assignment via a cast + SpvId result_id = cast_type(op->type, op->vectors[0].type(), arg_ids[0]); + builder.update_id(result_id); + + } else if (num_vectors == 2) { + + // 2 arguments, use the builtin vector shuffle that takes a pair of vectors + SpvFactory::Indices indices; + indices.reserve(op->indices.size()); + indices.insert(indices.end(), op->indices.begin(), op->indices.end()); + SpvId type_id = builder.declare_type(op->type); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::vector_shuffle(type_id, result_id, arg_ids[0], arg_ids[1], indices)); + builder.update_id(result_id); + } else { + std::vector vector_component_ids(num_vectors); + for (uint32_t i = 0; i < (uint32_t)arg_ids.size(); ++i) { + if (op->vectors[i].type().is_vector()) { + vector_component_ids[i] = split_vector(op->vectors[i].type(), arg_ids[i]); + } else { + vector_component_ids[i] = {arg_ids[i]}; + } + } + + SpvFactory::Components result_component_ids(op_lanes); + for (int i = 0; i < op_lanes && i < (int)op->indices.size(); i++) { + int idx = op->indices[i]; + int arg = idx % num_vectors; + int arg_idx = idx / num_vectors; + internal_assert(arg_idx <= (int)vector_component_ids[arg].size()); + result_component_ids[i] = vector_component_ids[arg][arg_idx]; + } + + SpvId result_id = join_vector(op->type, result_component_ids); + builder.update_id(result_id); + } + } +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const VectorReduce *) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const VectorReduce *): VectorReduce not implemented for codegen\n"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Prefetch *) { + internal_error << "CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Prefetch *): Prefetch not implemented for codegen\n"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Fork *) { + internal_error << "void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Fork *): Fork not implemented for codegen"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Acquire *) { + internal_error << "void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Acquire *): Acquire not implemented for codegen"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Atomic *) { + internal_error << "void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit(const Atomic *): Atomic not implemented for codegen"; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit_unary_op(SpvOp op_code, Type t, const Expr &a) { + SpvId type_id = builder.declare_type(t); + a.accept(this); + SpvId src_a_id = builder.current_id(); + + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::unary_op(op_code, type_id, result_id, src_a_id)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit_binary_op(SpvOp op_code, Type t, const Expr &a, const Expr &b) { + SpvId type_id = builder.declare_type(t); + a.accept(this); + SpvId src_a_id = builder.current_id(); + b.accept(this); + SpvId src_b_id = builder.current_id(); + + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::binary_op(op_code, type_id, result_id, src_a_id, src_b_id)); + builder.update_id(result_id); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::visit_glsl_op(SpvId glsl_op_code, Type type, const std::vector &args) { + SpvId type_id = builder.declare_type(type); + + SpvFactory::Operands operands; + operands.reserve(args.size()); + for (const Expr &e : args) { + e.accept(this); + SpvId arg_value_id = builder.current_id(); + if (builder.type_of(arg_value_id) != type_id) { + SpvId casted_value_id = cast_type(type, e.type(), arg_value_id); // all GLSL args must match return type + operands.push_back(casted_value_id); + } else { + operands.push_back(arg_value_id); + } + } + + // sanity check the expected number of operands + internal_assert(glsl_operand_count(glsl_op_code) == operands.size()); + + SpvId inst_set_id = builder.import_glsl_intrinsics(); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::extended(inst_set_id, glsl_op_code, type_id, result_id, operands)); + builder.update_id(result_id); +} + +SpvFactory::Components CodeGen_Vulkan_Dev::SPIRV_Emitter::split_vector(Type type, SpvId value_id) { + SpvFactory::Components value_components; + SpvId scalar_value_type_id = builder.declare_type(type.with_lanes(1)); + for (uint32_t i = 0; i < (uint32_t)type.lanes(); i++) { + SpvFactory::Indices extract_indices = {i}; + SpvId value_component_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_extract(scalar_value_type_id, value_component_id, value_id, extract_indices)); + value_components.push_back(value_component_id); + } + return value_components; +} + +SpvId CodeGen_Vulkan_Dev::SPIRV_Emitter::join_vector(Type type, const SpvFactory::Components &value_components) { + SpvId type_id = builder.declare_type(type); + SpvId result_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::composite_construct(type_id, result_id, value_components)); + return result_id; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::reset() { + kernel_index = 0; + builder.reset(); + SymbolScope empty; + symbol_table.swap(empty); + storage_access_map.clear(); + descriptor_set_table.clear(); + reset_workgroup_size(); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::init_module() { + reset(); + + if (target.has_feature(Target::VulkanV13)) { + // Encode to SPIR-V v1.2 to allow dynamic dispatching (if needed) + builder.set_version_format(0x00010200); + } else { + // Encode to SPIR-V v1.0 (which is the only format supported by Vulkan v1.0) + builder.set_version_format(0x00010000); + } + + // NOTE: Source language is irrelevant. We encode the binary directly + builder.set_source_language(SpvSourceLanguageUnknown); + + // TODO: Should we autodetect and/or force 32bit or 64bit? + builder.set_addressing_model(SpvAddressingModelLogical); + + // TODO: Should we autodetect the VulkanMemoryModel extension and use that instead? + builder.set_memory_model(SpvMemoryModelGLSL450); + + // NOTE: Execution model for Vulkan must be GLCompute which requires Shader support + builder.require_capability(SpvCapabilityShader); + + // NOTE: Extensions are handled in finalize +} + +namespace { + +std::vector encode_header_string(const std::string &str) { + uint32_t padded_word_count = (str.length() / 4) + 1; // add an extra entry to ensure strings are terminated + uint32_t padded_str_length = padded_word_count * 4; + std::vector encoded_string(padded_str_length, '\0'); + for (uint32_t c = 0; c < str.length(); c++) { + encoded_string[c] = str[c]; + } + return encoded_string; +} + +} // namespace + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::encode_header(SpvBinary &spirv_header) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::encode_header\n"; + + // Encode a sidecar for the module that lists the descriptor sets + // corresponding to each entry point contained in the module. + // + // This metadata will be used at runtime to define the shader bindings + // needed for all buffers, constants, shared memory, and workgroup sizes + // that are required for execution. + // + // Like the SPIR-V code module, each entry is one word (1x uint32_t). + // Variable length sections are prefixed with their length (ie number of entries). + // + // [0] Header word count (total length of header) + // [1] Number of descriptor sets + // ... For each descriptor set ... + // ... [0] Length of entry point name (padded to nearest word size) + // ....... [*] Entry point string data (padded with null chars) + // ... [1] Number of uniform buffers for this descriptor set + // ... [2] Number of storage buffers for this descriptor set + // ... [3] Number of specialization constants for this descriptor set + // ....... For each specialization constant ... + // ....... [0] Length of constant name string (padded to nearest word size) + // ........... [*] Constant name string data (padded with null chars) + // ....... [1] Constant id (as used in VkSpecializationMapEntry for binding) + // ....... [2] Size of data type (in bytes) + // ... [4] Number of shared memory allocations for this descriptor set + // ....... For each allocation ... + // ....... [0] Length of variable name string (padded to nearest word size) + // ........... [*] Variable name string data (padded with null chars) + // ....... [1] Constant id to use for overriding array size (zero if it is not bound to a specialization constant) + // ....... [2] Size of data type (in bytes) + // ....... [3] Size of array (ie element count) + // ... [4] Dynamic workgroup dimensions bound to specialization constants + // ....... [0] Constant id to use for local_size_x (zero if it was statically declared and not bound to a specialization constant) + // ....... [1] Constant id to use for local_size_y + // ....... [2] Constant id ot use for local_size_z + // + // NOTE: Halide's Vulkan runtime consumes this header prior to compiling. + // + // Both vk_decode_shader_bindings() and vk_compile_shader_module() will + // need to be updated if the header encoding ever changes! + // + uint32_t index = 0; + spirv_header.push_back(descriptor_set_table.size()); + for (const DescriptorSet &ds : descriptor_set_table) { + + // encode the entry point name into an array of chars (padded to the next word entry) + std::vector entry_point_name = encode_header_string(ds.entry_point_name); + uint32_t entry_point_name_entries = (uint32_t)(entry_point_name.size() / sizeof(uint32_t)); + + debug(2) << " [" << index << "] " + << "uniform_buffer_count=" << ds.uniform_buffer_count << " " + << "storage_buffer_count=" << ds.storage_buffer_count << " " + << "entry_point_name_size=" << entry_point_name.size() << " " + << "entry_point_name: " << (const char *)entry_point_name.data() << "\n"; + + // [0] Length of entry point name (padded to nearest word size) + spirv_header.push_back(entry_point_name_entries); + + // [*] Entry point string data (padded with null chars) + spirv_header.insert(spirv_header.end(), (const uint32_t *)entry_point_name.data(), (const uint32_t *)(entry_point_name.data() + entry_point_name.size())); + + // [1] Number of uniform buffers for this descriptor set + spirv_header.push_back(ds.uniform_buffer_count); + + // [2] Number of storage buffers for this descriptor set + spirv_header.push_back(ds.storage_buffer_count); + + // [3] Number of specialization constants for this descriptor set + spirv_header.push_back((uint32_t)ds.specialization_constants.size()); + debug(2) << " specialization_count=" << (uint32_t)ds.specialization_constants.size() << "\n"; + + // For each specialization constant ... + for (const SpecializationBinding &spec_binding : ds.specialization_constants) { + + // encode the constant name into an array of chars (padded to the next word entry) + std::vector constant_name = encode_header_string(spec_binding.constant_name); + uint32_t constant_name_entries = (uint32_t)(constant_name.size() / sizeof(uint32_t)); + + debug(2) << " [" << spec_binding.constant_id << "] " + << "constant_name=" << (const char *)constant_name.data() << " " + << "type_size=" << spec_binding.type_size << "\n"; + + // [0] Length of constant name string (padded to nearest word size) + spirv_header.push_back(constant_name_entries); + + // [*] Constant name string data (padded with null chars) + spirv_header.insert(spirv_header.end(), (const uint32_t *)constant_name.data(), (const uint32_t *)(constant_name.data() + constant_name.size())); + + // [1] Constant id (as used in VkSpecializationMapEntry for binding) + spirv_header.push_back(spec_binding.constant_id); + + // [2] Size of data type (in bytes) + spirv_header.push_back(spec_binding.type_size); + } + + // [4] Number of shared memory allocations for this descriptor set + spirv_header.push_back((uint32_t)ds.shared_memory_usage.size()); + debug(2) << " shared_memory_allocations=" << (uint32_t)ds.shared_memory_usage.size() << "\n"; + + // For each allocation ... + uint32_t shm_index = 0; + for (const SharedMemoryAllocation &shared_mem_alloc : ds.shared_memory_usage) { + + // encode the variable name into an array of chars (padded to the next word entry) + std::vector variable_name = encode_header_string(shared_mem_alloc.variable_name); + uint32_t variable_name_entries = (uint32_t)(variable_name.size() / sizeof(uint32_t)); + + debug(2) << " [" << shm_index++ << "] " + << "variable_name=" << (const char *)variable_name.data() << " " + << "constant_id=" << shared_mem_alloc.constant_id << " " + << "type_size=" << shared_mem_alloc.type_size << " " + << "array_size=" << shared_mem_alloc.array_size << "\n"; + + // [0] Length of variable name string (padded to nearest word size) + spirv_header.push_back(variable_name_entries); + + // [*] Variable name string data (padded with null chars) + spirv_header.insert(spirv_header.end(), (const uint32_t *)variable_name.data(), (const uint32_t *)(variable_name.data() + variable_name.size())); + + // [1] Constant id to use for overriding array size (zero if it is not bound to a specialization constant) + spirv_header.push_back(shared_mem_alloc.constant_id); + + // [2] Size of data type (in bytes) + spirv_header.push_back(shared_mem_alloc.type_size); + + // [3] Size of array (ie element count) + spirv_header.push_back(shared_mem_alloc.array_size); + } + + // [4] Dynamic workgroup dimensions bound to specialization constants + spirv_header.push_back(ds.workgroup_size_binding.local_size_constant_id[0]); + spirv_header.push_back(ds.workgroup_size_binding.local_size_constant_id[1]); + spirv_header.push_back(ds.workgroup_size_binding.local_size_constant_id[2]); + ++index; + } + uint32_t header_word_count = spirv_header.size(); + spirv_header.insert(spirv_header.begin(), header_word_count + 1); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::reset_workgroup_size() { + workgroup_size[0] = 0; + workgroup_size[1] = 0; + workgroup_size[2] = 0; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::find_workgroup_size(const Stmt &s) { + reset_workgroup_size(); + FindWorkGroupSize fwgs; + s.accept(&fwgs); + + workgroup_size[0] = fwgs.workgroup_size[0]; + workgroup_size[1] = fwgs.workgroup_size[1]; + workgroup_size[2] = fwgs.workgroup_size[2]; +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_workgroup_size(SpvId kernel_func_id) { + + if (workgroup_size[0] == 0) { + + // workgroup size is dynamic ... + if (!target.has_feature(Target::VulkanV13)) { + user_error << "Vulkan: Dynamic workgroup sizes require Vulkan v1.3+ support! " + << "Either enable the target feature, or adjust the pipeline's schedule " + << "to use static workgroup sizes!"; + } + + // declare the workgroup local size as a specialization constant (which will get overridden at runtime) + Type local_size_type = UInt(32); + + uint32_t local_size_x = std::max(workgroup_size[0], (uint32_t)1); // use a minimum of 1 for the default value + uint32_t local_size_y = std::max(workgroup_size[1], (uint32_t)1); + uint32_t local_size_z = std::max(workgroup_size[2], (uint32_t)1); + + SpvId local_size_x_id = builder.declare_specialization_constant(local_size_type, &local_size_x); + SpvId local_size_y_id = builder.declare_specialization_constant(local_size_type, &local_size_y); + SpvId local_size_z_id = builder.declare_specialization_constant(local_size_type, &local_size_z); + + SpvId local_size_ids[3] = { + local_size_x_id, + local_size_y_id, + local_size_z_id}; + + const char *local_size_names[3] = { + "__thread_id_x", + "__thread_id_y", + "__thread_id_z"}; + + debug(1) << "Vulkan: Using dynamic workgroup local size with default of [" << local_size_x << ", " << local_size_y << ", " << local_size_z << "]...\n"; + + // annotate each local size with a corresponding specialization constant + for (uint32_t dim = 0; dim < 3; dim++) { + SpvId constant_id = (uint32_t)(descriptor_set_table.back().specialization_constants.size() + 1); + SpvBuilder::Literals spec_id = {constant_id}; + builder.add_annotation(local_size_ids[dim], SpvDecorationSpecId, spec_id); + builder.add_symbol(local_size_names[dim], local_size_ids[dim], builder.current_module().id()); + SpecializationBinding spec_binding = {constant_id, (uint32_t)sizeof(uint32_t), local_size_names[dim]}; + descriptor_set_table.back().specialization_constants.push_back(spec_binding); + descriptor_set_table.back().workgroup_size_binding.local_size_constant_id[dim] = constant_id; + } + + // Add workgroup size to execution mode + SpvInstruction exec_mode_inst = SpvFactory::exec_mode_local_size_id(kernel_func_id, local_size_x_id, local_size_y_id, local_size_z_id); + builder.current_module().add_execution_mode(exec_mode_inst); + + } else { + + // workgroup size is static ... + workgroup_size[0] = std::max(workgroup_size[0], (uint32_t)1); + workgroup_size[1] = std::max(workgroup_size[1], (uint32_t)1); + workgroup_size[2] = std::max(workgroup_size[2], (uint32_t)1); + + debug(1) << "Vulkan: Using static workgroup local size [" << workgroup_size[0] << ", " << workgroup_size[1] << ", " << workgroup_size[2] << "]...\n"; + + // Add workgroup size to execution mode + SpvInstruction exec_mode_inst = SpvFactory::exec_mode_local_size(kernel_func_id, workgroup_size[0], workgroup_size[1], workgroup_size[2]); + builder.current_module().add_execution_mode(exec_mode_inst); + } +} + +namespace { + +// Locate all the unique GPU variables used as SIMT intrinsics +class FindIntrinsicsUsed : public IRVisitor { + using IRVisitor::visit; + void visit(const For *op) override { + if (CodeGen_GPU_Dev::is_gpu_var(op->name)) { + auto intrinsic = simt_intrinsic(op->name); + intrinsics_used.insert(intrinsic.first); + } + op->body.accept(this); + } + void visit(const Variable *op) override { + if (CodeGen_GPU_Dev::is_gpu_var(op->name)) { + auto intrinsic = simt_intrinsic(op->name); + intrinsics_used.insert(intrinsic.first); + } + } + +public: + std::unordered_set intrinsics_used; + FindIntrinsicsUsed() = default; +}; + +// Map the SPIR-V builtin intrinsic name to its corresponding enum value +SpvBuiltIn map_simt_builtin(const std::string &intrinsic_name) { + if (starts_with(intrinsic_name, "Workgroup")) { + return SpvBuiltInWorkgroupId; + } else if (starts_with(intrinsic_name, "Local")) { + return SpvBuiltInLocalInvocationId; + } + internal_error << "map_simt_builtin called on bad variable name: " << intrinsic_name << "\n"; + return SpvBuiltInMax; +} + +} // namespace + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_entry_point(const Stmt &s, SpvId kernel_func_id) { + + // Locate all simt intrinsics + FindIntrinsicsUsed find_intrinsics; + s.accept(&find_intrinsics); + + SpvFactory::Variables entry_point_variables; + for (const std::string &intrinsic_name : find_intrinsics.intrinsics_used) { + + // The builtins are pointers to vec3 + SpvStorageClass storage_class = SpvStorageClassInput; + SpvId intrinsic_type_id = builder.declare_type(Type(Type::UInt, 32, 3)); + SpvId intrinsic_ptr_type_id = builder.declare_pointer_type(intrinsic_type_id, storage_class); + const std::string intrinsic_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + intrinsic_name; + SpvId intrinsic_var_id = builder.declare_global_variable(intrinsic_var_name, intrinsic_ptr_type_id, storage_class); + SpvId intrinsic_loaded_id = builder.reserve_id(); + builder.append(SpvFactory::load(intrinsic_type_id, intrinsic_loaded_id, intrinsic_var_id)); + symbol_table.push(intrinsic_var_name, {intrinsic_loaded_id, storage_class}); + + // Annotate that this is the specific builtin + SpvBuiltIn built_in_kind = map_simt_builtin(intrinsic_name); + SpvBuilder::Literals annotation_literals = {(uint32_t)built_in_kind}; + builder.add_annotation(intrinsic_var_id, SpvDecorationBuiltIn, annotation_literals); + + // Add the builtin to the interface + entry_point_variables.push_back(intrinsic_var_id); + } + + // Add the entry point with the appropriate execution model + // NOTE: exec_model must be GLCompute to work with Vulkan ... Kernel is only supported in OpenCL + builder.add_entry_point(kernel_func_id, SpvExecutionModelGLCompute, entry_point_variables); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::declare_device_args(const Stmt &s, uint32_t entry_point_index, + const std::string &entry_point_name, + const std::vector &args) { + + // Keep track of the descriptor set needed to bind this kernel's inputs / outputs + DescriptorSet descriptor_set; + descriptor_set.entry_point_name = entry_point_name; + + // Add required extension support for storage types which are necessary to + // use smaller bit-width types for any halide buffer *or* device argument + // (passed as a runtime array) + for (const auto &arg : args) { + if (arg.type.is_int_or_uint()) { + if (arg.type.bits() == 8) { + builder.require_extension("SPV_KHR_8bit_storage"); + } else if (arg.type.bits() == 16) { + builder.require_extension("SPV_KHR_16bit_storage"); + } + } + } + + // GLSL-style: each input buffer is a runtime array in a buffer struct + // All other params get passed in as a single uniform block + // First, need to count scalar parameters to construct the uniform struct + SpvBuilder::StructMemberTypes param_struct_members; + for (const auto &arg : args) { + if (!arg.is_buffer) { + // Add required access capability for smaller bit-width types used as runtime arrays + if (arg.type.bits() == 8) { + builder.require_capability(SpvCapabilityUniformAndStorageBuffer8BitAccess); + } else if (arg.type.bits() == 16) { + builder.require_capability(SpvCapabilityUniformAndStorageBuffer16BitAccess); + } + + SpvId arg_type_id = builder.declare_type(arg.type); + param_struct_members.push_back(arg_type_id); + } + } + + // Add a binding for a uniform buffer packed with all scalar args + uint32_t binding_counter = 0; + if (!param_struct_members.empty()) { + + const std::string struct_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_struct"); + SpvId param_struct_type_id = builder.declare_struct(struct_name, param_struct_members); + + // Add a decoration describing the offset for each parameter struct member + uint32_t param_member_index = 0; + uint32_t param_member_offset = 0; + for (const auto &arg : args) { + if (!arg.is_buffer) { + SpvBuilder::Literals param_offset_literals = {param_member_offset}; + builder.add_struct_annotation(param_struct_type_id, param_member_index, SpvDecorationOffset, param_offset_literals); + param_member_offset += arg.type.bytes(); + param_member_index++; + } + } + + // Add a Block decoration for the parameter pack itself + builder.add_annotation(param_struct_type_id, SpvDecorationBlock); + + // Add a variable for the parameter pack + const std::string param_pack_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_args_var"); + SpvId param_pack_ptr_type_id = builder.declare_pointer_type(param_struct_type_id, SpvStorageClassUniform); + SpvId param_pack_var_id = builder.declare_global_variable(param_pack_var_name, param_pack_ptr_type_id, SpvStorageClassUniform); + + // We always pass in the parameter pack as the first binding + SpvBuilder::Literals binding_index = {0}; + SpvBuilder::Literals dset_index = {entry_point_index}; + builder.add_annotation(param_pack_var_id, SpvDecorationDescriptorSet, dset_index); + builder.add_annotation(param_pack_var_id, SpvDecorationBinding, binding_index); + descriptor_set.uniform_buffer_count++; + binding_counter++; + + // Declare all the args with appropriate offsets into the parameter struct + uint32_t scalar_index = 0; + for (const auto &arg : args) { + if (!arg.is_buffer) { + + SpvId arg_type_id = builder.declare_type(arg.type); + SpvId access_index_id = builder.declare_constant(UInt(32), &scalar_index); + SpvId pointer_type_id = builder.declare_pointer_type(arg_type_id, SpvStorageClassUniform); + SpvFactory::Indices access_indices = {access_index_id}; + SpvId access_chain_id = builder.declare_access_chain(pointer_type_id, param_pack_var_id, access_indices); + scalar_index++; + + SpvId param_id = builder.reserve_id(SpvResultId); + builder.append(SpvFactory::load(arg_type_id, param_id, access_chain_id)); + symbol_table.push(arg.name, {param_id, SpvStorageClassUniform}); + } + } + } + + // Add bindings for all device buffers declared as GLSL-style buffer blocks in uniform storage + for (const auto &arg : args) { + if (arg.is_buffer) { + + // Check for dense loads & stores to determine the widest vector + // width we can safely index + CheckAlignedDenseVectorLoadStore check_dense(arg.name); + s.accept(&check_dense); + int lanes = check_dense.are_all_dense ? check_dense.lanes : 1; + + // Declare the runtime array (which maps directly to the Halide device buffer) + Type array_element_type = arg.type.with_lanes(lanes); + SpvId array_element_type_id = builder.declare_type(array_element_type); + SpvId runtime_arr_type_id = builder.add_runtime_array(array_element_type_id); + + // Annotate the array with its stride + SpvBuilder::Literals array_stride = {(uint32_t)(arg.type.bytes())}; + builder.add_annotation(runtime_arr_type_id, SpvDecorationArrayStride, array_stride); + + // Wrap the runtime array in a struct (required with SPIR-V buffer block semantics) + SpvBuilder::StructMemberTypes struct_member_types = {runtime_arr_type_id}; + const std::string struct_name = std::string("k") + std::to_string(kernel_index) + std::string("_buffer_block") + std::to_string(binding_counter); + SpvId struct_type_id = builder.declare_struct(struct_name, struct_member_types); + + // Declare a pointer to the struct as a global variable + SpvStorageClass storage_class = SpvStorageClassUniform; + SpvId ptr_struct_type_id = builder.declare_pointer_type(struct_type_id, storage_class); + const std::string buffer_block_var_name = std::string("k") + std::to_string(kernel_index) + std::string("_") + arg.name; + SpvId buffer_block_var_id = builder.declare_global_variable(buffer_block_var_name, ptr_struct_type_id, storage_class); + + // Annotate the struct to indicate it's passed in a GLSL-style buffer block + builder.add_annotation(struct_type_id, SpvDecorationBufferBlock); + + // Annotate the offset for the array + SpvBuilder::Literals zero_literal = {uint32_t(0)}; + builder.add_struct_annotation(struct_type_id, 0, SpvDecorationOffset, zero_literal); + + // Set descriptor set and binding indices + SpvBuilder::Literals dset_index = {entry_point_index}; + SpvBuilder::Literals binding_index = {uint32_t(binding_counter++)}; + builder.add_annotation(buffer_block_var_id, SpvDecorationDescriptorSet, dset_index); + builder.add_annotation(buffer_block_var_id, SpvDecorationBinding, binding_index); + symbol_table.push(arg.name, {buffer_block_var_id, storage_class}); + + StorageAccess access; + access.storage_type_id = array_element_type_id; + access.storage_type = array_element_type; + access.storage_class = storage_class; + storage_access_map[buffer_block_var_id] = access; + descriptor_set.storage_buffer_count++; + } + } + + // Save the descriptor set (so we can output the binding information as a header to the code module) + descriptor_set_table.push_back(descriptor_set); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::compile(std::vector &module) { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::compile\n"; + + // First encode the descriptor set bindings for each entry point + // as a sidecar which we will add as a preamble header to the actual + // SPIR-V binary so the runtime can know which descriptor set to use + // for each entry point + SpvBinary spirv_header; + encode_header(spirv_header); + + // Finalize the SPIR-V module + builder.finalize(); + + // Validate the SPIR-V for the target + if (builder.is_capability_required(SpvCapabilityInt8) && !target.has_feature(Target::VulkanInt8)) { + user_error << "Vulkan: Code requires 8-bit integer support (which is not enabled in the target features)! " + << "Either enable the target feature, or adjust the algorithm to avoid using this data type!"; + } + + if (builder.is_capability_required(SpvCapabilityInt16) && !target.has_feature(Target::VulkanInt16)) { + user_error << "Vulkan: Code requires 16-bit integer support (which is not enabled in the target features)! " + << "Either enable the target feature, or adjust the algorithm to avoid using this data type!"; + } + + if (builder.is_capability_required(SpvCapabilityInt64) && !target.has_feature(Target::VulkanInt64)) { + user_error << "Vulkan: Code requires 64-bit integer support (which is not enabled in the target features)! " + << "Either enable the target feature, or adjust the algorithm to avoid using this data type!"; + } + + if (builder.is_capability_required(SpvCapabilityFloat16) && !target.has_feature(Target::VulkanFloat16)) { + user_error << "Vulkan: Code requires 16-bit floating-point support (which is not enabled in the target features)! " + << "Either enable the target feature, or adjust the algorithm to avoid using this data type!"; + } + + if (builder.is_capability_required(SpvCapabilityFloat64) && !target.has_feature(Target::VulkanFloat64)) { + user_error << "Vulkan: Code requires 64-bit floating-point support (which is not enabled in the target features)! " + << "Either enable the target feature, or adjust the algorithm to avoid using this data type!"; + } + + // Encode the SPIR-V into a compliant binary + SpvBinary spirv_binary; + builder.encode(spirv_binary); + + size_t header_bytes = spirv_header.size() * sizeof(uint32_t); + size_t binary_bytes = spirv_binary.size() * sizeof(uint32_t); + + debug(2) << " encoding module (" + << "header_size: " << (uint32_t)(header_bytes) << ", " + << "binary_size: " << (uint32_t)(binary_bytes) << ")\n"; + + // Combine the header and binary into the module + module.reserve(header_bytes + binary_bytes); + module.insert(module.end(), (const char *)spirv_header.data(), (const char *)(spirv_header.data() + spirv_header.size())); + module.insert(module.end(), (const char *)spirv_binary.data(), (const char *)(spirv_binary.data() + spirv_binary.size())); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::add_kernel(const Stmt &s, + const std::string &kernel_name, + const std::vector &args) { + debug(2) << "Adding Vulkan kernel " << kernel_name << "\n"; + + // Add function definition + // TODO: can we use one of the function control annotations? + // https://github.com/halide/Halide/issues/7533 + + // Discover the workgroup size + find_workgroup_size(s); + + // Update the kernel index for the module + kernel_index++; + + // Declare the kernel function + SpvId void_type_id = builder.declare_void_type(); + SpvId kernel_func_id = builder.add_function(kernel_name, void_type_id); + SpvFunction kernel_func = builder.lookup_function(kernel_func_id); + uint32_t entry_point_index = builder.current_module().entry_point_count(); + builder.enter_function(kernel_func); + + // Declare the entry point and input intrinsics for the kernel func + declare_entry_point(s, kernel_func_id); + + // Declare all parameters -- scalar args and device buffers + declare_device_args(s, entry_point_index, kernel_name, args); + + // Traverse + s.accept(this); + + // Insert return statement end delimiter + kernel_func.tail_block().add_instruction(SpvFactory::return_stmt()); + + // Declare the workgroup size for the kernel + declare_workgroup_size(kernel_func_id); + + // Pop scope + for (const auto &arg : args) { + symbol_table.pop(arg.name); + } + builder.leave_block(); + builder.leave_function(); + storage_access_map.clear(); +} + +void CodeGen_Vulkan_Dev::SPIRV_Emitter::dump() const { + debug(2) << "CodeGen_Vulkan_Dev::SPIRV_Emitter::dump()\n"; + std::cerr << builder.current_module(); +} + +CodeGen_Vulkan_Dev::CodeGen_Vulkan_Dev(Target t) + : emitter(t) { + // Empty +} + +void CodeGen_Vulkan_Dev::init_module() { + debug(2) << "CodeGen_Vulkan_Dev::init_module\n"; + emitter.init_module(); +} + +void CodeGen_Vulkan_Dev::add_kernel(Stmt stmt, + const std::string &name, + const std::vector &args) { + + debug(2) << "CodeGen_Vulkan_Dev::add_kernel " << name << "\n"; + + // We need to scalarize/de-predicate any loads/stores, since Vulkan does not support predication. + stmt = scalarize_predicated_loads_stores(stmt); + + debug(2) << "CodeGen_Vulkan_Dev: after removing predication: \n" + << stmt; + + current_kernel_name = name; + emitter.add_kernel(stmt, name, args); + + // dump the SPIRV file if requested + if (getenv("HL_SPIRV_DUMP_FILE")) { + dump(); + } +} + +std::vector CodeGen_Vulkan_Dev::compile_to_src() { + debug(2) << "CodeGen_Vulkan_Dev::compile_to_src\n"; + std::vector module; + emitter.compile(module); + return module; +} + +std::string CodeGen_Vulkan_Dev::get_current_kernel_name() { + return current_kernel_name; +} + +std::string CodeGen_Vulkan_Dev::print_gpu_name(const std::string &name) { + return name; +} + +void CodeGen_Vulkan_Dev::dump() { + std::vector module = compile_to_src(); + + // Print the contents of the compiled SPIR-V module + emitter.dump(); + + // Skip the header and only output the SPIR-V binary + const uint32_t *decode = (const uint32_t *)(module.data()); + uint32_t header_word_count = decode[0]; + size_t header_size = header_word_count * sizeof(uint32_t); + const uint32_t *binary_ptr = (decode + header_word_count); + size_t binary_size = (module.size() - header_size); + + const char *filename = getenv("HL_SPIRV_DUMP_FILE") ? getenv("HL_SPIRV_DUMP_FILE") : "out.spv"; + debug(1) << "Vulkan: Dumping SPIRV module to file: '" << filename << "'\n"; + std::ofstream f(filename, std::ios::out | std::ios::binary); + f.write((const char *)(binary_ptr), binary_size); + f.close(); +} + +} // namespace + +std::unique_ptr new_CodeGen_Vulkan_Dev(const Target &target) { + return std::make_unique(target); +} + +} // namespace Internal +} // namespace Halide + +#else // WITH_SPIRV + +namespace Halide { +namespace Internal { + +std::unique_ptr new_CodeGen_Vulkan_Dev(const Target &target) { + return nullptr; +} + +} // namespace Internal +} // namespace Halide + +#endif // WITH_SPIRV diff --git a/src/CodeGen_Vulkan_Dev.h b/src/CodeGen_Vulkan_Dev.h new file mode 100644 index 000000000000..d4830309e653 --- /dev/null +++ b/src/CodeGen_Vulkan_Dev.h @@ -0,0 +1,24 @@ +#ifndef HALIDE_CODEGEN_VULKAN_DEV_H +#define HALIDE_CODEGEN_VULKAN_DEV_H + +/** \file + * Defines the code-generator for producing SPIR-V binary modules for + * use with the Vulkan runtime + */ + +#include + +namespace Halide { + +struct Target; + +namespace Internal { + +struct CodeGen_GPU_Dev; + +std::unique_ptr new_CodeGen_Vulkan_Dev(const Target &target); + +} // namespace Internal +} // namespace Halide + +#endif diff --git a/src/DeviceAPI.h b/src/DeviceAPI.h index b4d5b72f827f..1f67aaf7b048 100644 --- a/src/DeviceAPI.h +++ b/src/DeviceAPI.h @@ -23,6 +23,7 @@ enum class DeviceAPI { Hexagon, HexagonDma, D3D12Compute, + Vulkan, WebGPU, }; @@ -38,6 +39,7 @@ const DeviceAPI all_device_apis[] = {DeviceAPI::None, DeviceAPI::Hexagon, DeviceAPI::HexagonDma, DeviceAPI::D3D12Compute, + DeviceAPI::Vulkan, DeviceAPI::WebGPU}; } // namespace Halide diff --git a/src/DeviceInterface.cpp b/src/DeviceInterface.cpp index ae8d2175ae63..9a0cb2f97e99 100644 --- a/src/DeviceInterface.cpp +++ b/src/DeviceInterface.cpp @@ -102,6 +102,8 @@ const halide_device_interface_t *get_device_interface_for_device_api(DeviceAPI d name = "hexagon_dma"; } else if (d == DeviceAPI::D3D12Compute) { name = "d3d12compute"; + } else if (d == DeviceAPI::Vulkan) { + name = "vulkan"; } else if (d == DeviceAPI::WebGPU) { name = "webgpu"; } else { @@ -160,6 +162,8 @@ DeviceAPI get_default_device_api_for_target(const Target &target) { return DeviceAPI::HexagonDma; } else if (target.has_feature(Target::D3D12Compute)) { return DeviceAPI::D3D12Compute; + } else if (target.has_feature(Target::Vulkan)) { + return DeviceAPI::Vulkan; } else if (target.has_feature(Target::WebGPU)) { return DeviceAPI::WebGPU; } else { @@ -200,6 +204,9 @@ Expr make_device_interface_call(DeviceAPI device_api, MemoryType memory_type) { case DeviceAPI::D3D12Compute: interface_name = "halide_d3d12compute_device_interface"; break; + case DeviceAPI::Vulkan: + interface_name = "halide_vulkan_device_interface"; + break; case DeviceAPI::WebGPU: interface_name = "halide_webgpu_device_interface"; break; diff --git a/src/FuseGPUThreadLoops.cpp b/src/FuseGPUThreadLoops.cpp index 6a7ccaba9fa2..3c678b5bd693 100644 --- a/src/FuseGPUThreadLoops.cpp +++ b/src/FuseGPUThreadLoops.cpp @@ -759,7 +759,7 @@ class ExtractSharedAndHeapAllocations : public IRMutator { // lifetimes, and then cluster the groups according to which // ones can share a single allocation. For cuda, opencl, and // similar we get one big combined allocation per memory - // type. For openglcompute and direct3d, we also separate by + // type. For vulkan, openglcompute and direct3d, we also separate by // element type. map, vector> clustered_allocs; @@ -1029,6 +1029,7 @@ class ExtractSharedAndHeapAllocations : public IRMutator { num_threads_var_name(unique_name('t')), may_merge_allocs_of_different_type(device_api != DeviceAPI::OpenGLCompute && device_api != DeviceAPI::D3D12Compute && + device_api != DeviceAPI::Vulkan && device_api != DeviceAPI::WebGPU) { } }; // namespace Internal @@ -1490,7 +1491,8 @@ class ZeroGPULoopMins : public IRMutator { in_non_glsl_gpu = (in_non_glsl_gpu && op->device_api == DeviceAPI::None) || (op->device_api == DeviceAPI::CUDA) || (op->device_api == DeviceAPI::OpenCL) || (op->device_api == DeviceAPI::Metal) || - (op->device_api == DeviceAPI::D3D12Compute); + (op->device_api == DeviceAPI::D3D12Compute) || + (op->device_api == DeviceAPI::Vulkan); Stmt stmt = IRMutator::visit(op); if (CodeGen_GPU_Dev::is_gpu_var(op->name) && !is_const_zero(op->min)) { diff --git a/src/IRPrinter.cpp b/src/IRPrinter.cpp index 8cab6d61aee5..0d746e034483 100644 --- a/src/IRPrinter.cpp +++ b/src/IRPrinter.cpp @@ -114,6 +114,9 @@ ostream &operator<<(ostream &out, const DeviceAPI &api) { case DeviceAPI::D3D12Compute: out << ""; break; + case DeviceAPI::Vulkan: + out << ""; + break; case DeviceAPI::WebGPU: out << ""; break; diff --git a/src/JITModule.cpp b/src/JITModule.cpp index 58fdf280e663..0a37e20d6ff8 100644 --- a/src/JITModule.cpp +++ b/src/JITModule.cpp @@ -115,6 +115,27 @@ void load_metal() { #endif } +void load_vulkan() { + if (have_symbol("vkGetInstanceProcAddr")) { + debug(1) << "Vulkan support code already linked in...\n"; + } else { + debug(1) << "Looking for Vulkan support code...\n"; + string error; +#if defined(__linux__) + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libvulkan.so.1", &error); + user_assert(error.empty()) << "Could not find libvulkan.so.1\n"; +#elif defined(__APPLE__) + llvm::sys::DynamicLibrary::LoadLibraryPermanently("libvulkan.1.dylib", &error); + user_assert(error.empty()) << "Could not find libvulkan.1.dylib\n"; +#elif defined(_WIN32) + llvm::sys::DynamicLibrary::LoadLibraryPermanently("vulkan-1.dll", &error); + user_assert(error.empty()) << "Could not find vulkan-1.dll\n"; +#else + internal_error << "JIT support for Vulkan only available on Linux, OS X and Windows!\n"; +#endif + } +} + void load_webgpu() { debug(1) << "Looking for a native WebGPU implementation...\n"; const char *libnames[] = { @@ -736,6 +757,7 @@ enum RuntimeKind { OpenGLCompute, Hexagon, D3D12Compute, + Vulkan, WebGPU, OpenCLDebug, MetalDebug, @@ -743,6 +765,7 @@ enum RuntimeKind { OpenGLComputeDebug, HexagonDebug, D3D12ComputeDebug, + VulkanDebug, WebGPUDebug, MaxRuntimeKind }; @@ -779,6 +802,7 @@ JITModule &make_module(llvm::Module *for_module, Target target, one_gpu.set_feature(Target::HVX, false); one_gpu.set_feature(Target::OpenGLCompute, false); one_gpu.set_feature(Target::D3D12Compute, false); + one_gpu.set_feature(Target::Vulkan, false); one_gpu.set_feature(Target::WebGPU, false); string module_name; switch (runtime_kind) { @@ -843,6 +867,17 @@ JITModule &make_module(llvm::Module *for_module, Target target, internal_error << "JIT support for Direct3D 12 is only implemented on Windows 10 and above.\n"; #endif break; + case VulkanDebug: + one_gpu.set_feature(Target::Debug); + one_gpu.set_feature(Target::Vulkan); + load_vulkan(); + module_name = "debug_vulkan"; + break; + case Vulkan: + one_gpu.set_feature(Target::Vulkan); + load_vulkan(); + module_name += "vulkan"; + break; case WebGPUDebug: one_gpu.set_feature(Target::Debug); one_gpu.set_feature(Target::WebGPU); @@ -1047,6 +1082,13 @@ std::vector JITSharedRuntime::get(llvm::Module *for_module, const Tar result.push_back(m); } } + if (target.has_feature(Target::Vulkan)) { + auto kind = target.has_feature(Target::Debug) ? VulkanDebug : Vulkan; + JITModule m = make_module(for_module, target, kind, result, create); + if (m.compiled()) { + result.push_back(m); + } + } if (target.has_feature(Target::WebGPU)) { auto kind = target.has_feature(Target::Debug) ? WebGPUDebug : WebGPU; JITModule m = make_module(for_module, target, kind, result, create); @@ -1054,7 +1096,6 @@ std::vector JITSharedRuntime::get(llvm::Module *for_module, const Tar result.push_back(m); } } - return result; } diff --git a/src/LLVM_Runtime_Linker.cpp b/src/LLVM_Runtime_Linker.cpp index 61ac31487a78..ff2f64986b27 100644 --- a/src/LLVM_Runtime_Linker.cpp +++ b/src/LLVM_Runtime_Linker.cpp @@ -235,6 +235,14 @@ DECLARE_CPP_INITMOD_LOOKUP(windows_d3d12compute_arm) DECLARE_NO_INITMOD(windows_d3d12compute_arm) #endif // WITH_D3D12 +#ifdef WITH_VULKAN +DECLARE_CPP_INITMOD(vulkan) +DECLARE_CPP_INITMOD(windows_vulkan) +#else +DECLARE_NO_INITMOD(vulkan) +DECLARE_NO_INITMOD(windows_vulkan) +#endif // WITH_VULKAN + #ifdef WITH_X86 DECLARE_LL_INITMOD(x86_amx) DECLARE_LL_INITMOD(x86_avx512) @@ -1199,6 +1207,13 @@ std::unique_ptr get_initial_module_for_target(Target t, llvm::LLVM user_error << "Direct3D 12 can only be used on ARM or X86 architectures.\n"; } } + if (t.has_feature(Target::Vulkan)) { + if (t.os == Target::Windows) { + modules.push_back(get_initmod_windows_vulkan(c, bits_64, debug)); + } else { + modules.push_back(get_initmod_vulkan(c, bits_64, debug)); + } + } if (t.has_feature(Target::WebGPU)) { if (t.os == Target::Windows) { // TOOD: Test on Windows and enable this. diff --git a/src/Lower.cpp b/src/Lower.cpp index 6fa07736b352..ecff36ee58b4 100644 --- a/src/Lower.cpp +++ b/src/Lower.cpp @@ -253,6 +253,7 @@ void lower_impl(const vector &output_funcs, // OpenGL relies on GPU var canonicalization occurring before // storage flattening. if (t.has_gpu_feature() || + t.has_feature(Target::Vulkan) || t.has_feature(Target::OpenGLCompute)) { debug(1) << "Canonicalizing GPU var names...\n"; s = canonicalize_gpu_vars(s); @@ -321,6 +322,7 @@ void lower_impl(const vector &output_funcs, log("Lowering after vectorizing:", s); if (t.has_gpu_feature() || + t.has_feature(Target::Vulkan) || t.has_feature(Target::OpenGLCompute)) { debug(1) << "Injecting per-block gpu synchronization...\n"; s = fuse_gpu_thread_loops(s); diff --git a/src/OffloadGPULoops.cpp b/src/OffloadGPULoops.cpp index 3ab9fb8b2c10..7b8464211994 100644 --- a/src/OffloadGPULoops.cpp +++ b/src/OffloadGPULoops.cpp @@ -7,6 +7,7 @@ #include "CodeGen_OpenCL_Dev.h" #include "CodeGen_OpenGLCompute_Dev.h" #include "CodeGen_PTX_Dev.h" +#include "CodeGen_Vulkan_Dev.h" #include "CodeGen_WebGPU_Dev.h" #include "ExprUsesVar.h" #include "IRMutator.h" @@ -275,6 +276,9 @@ class InjectGpuOffload : public IRMutator { if (target.has_feature(Target::D3D12Compute)) { cgdev[DeviceAPI::D3D12Compute] = new_CodeGen_D3D12Compute_Dev(device_target); } + if (target.has_feature(Target::Vulkan)) { + cgdev[DeviceAPI::Vulkan] = new_CodeGen_Vulkan_Dev(target); + } if (target.has_feature(Target::WebGPU)) { cgdev[DeviceAPI::WebGPU] = new_CodeGen_WebGPU_Dev(device_target); } diff --git a/src/SpirvIR.cpp b/src/SpirvIR.cpp index 621e79de7c62..73fdd7f24871 100644 --- a/src/SpirvIR.cpp +++ b/src/SpirvIR.cpp @@ -6,6 +6,140 @@ namespace Halide { namespace Internal { +namespace { + +template +T saturate_value(T val, T min = std::numeric_limits::min(), T max = std::numeric_limits::max()) { + return std::min(std::max(val, min), max); +} + +template +void assign_constant(void *dst, const void *src) { + reinterpret_cast(dst)[0] = saturate_value(reinterpret_cast(src)[0]); +} + +template<> +void assign_constant(void *dst, const void *src) { + reinterpret_cast(dst)[0] = reinterpret_cast(src)[0]; +} + +template<> +void assign_constant(void *dst, const void *src) { + reinterpret_cast(dst)[0] = reinterpret_cast(src)[0]; +} + +template<> +void assign_constant(void *dst, const void *src) { + reinterpret_cast(dst)[0] = reinterpret_cast(src)[0]; +} + +template<> +void assign_constant(void *dst, const void *src) { + reinterpret_cast(dst)[0] = reinterpret_cast(src)[0]; +} + +template +std::string stringify_constant(const T &value) { + return std::string(); +} + +template<> +std::string stringify_constant(const int8_t &value) { + return std::to_string(int8_t(value)); +} + +template<> +std::string stringify_constant(const int16_t &value) { + return std::to_string(int16_t(value)); +} + +template<> +std::string stringify_constant(const int32_t &value) { + return std::to_string(int32_t(value)); +} + +template<> +std::string stringify_constant(const int64_t &value) { + return std::to_string(int64_t(value)); +} + +template<> +std::string stringify_constant(const uint8_t &value) { + return std::to_string(uint8_t(value)); +} + +template<> +std::string stringify_constant(const uint16_t &value) { + return std::to_string(uint16_t(value)); +} + +template<> +std::string stringify_constant(const uint32_t &value) { + return std::to_string(uint32_t(value)); +} + +template<> +std::string stringify_constant(const uint64_t &value) { + return std::to_string(uint64_t(value)); +} + +template<> +std::string stringify_constant(const bfloat16_t &value) { + return std::to_string(float(value)); +} + +template<> +std::string stringify_constant(const float16_t &value) { + return std::to_string(float(value)); +} + +template<> +std::string stringify_constant(const float &value) { + return std::to_string(float(value)); +} + +template<> +std::string stringify_constant(const double &value) { + return std::to_string(double(value)); +} + +/** Returns the major version of the SPIR-V header version indicator **/ +inline uint32_t spirv_major_version(uint32_t version) { + return ((version >> 16) & 0xff); +} + +/** Returns the minor version of the SPIR-V header version indicator **/ +inline uint32_t spirv_minor_version(uint32_t version) { + return ((version >> 8) & 0xff); +} + +/** Returns the name string for a given SPIR-V operand **/ +const std::string &spirv_op_name(SpvId op); + +template +T constexpr rotl(const T n, const S i) { + static_assert(std::is_unsigned::value, "rotl only works on unsigned types"); + const T m = (std::numeric_limits::digits - 1); + const T c = i & m; + return (n << c) | (n >> ((T(0) - c) & m)); +} + +inline uint64_t hash_splitmix64(uint64_t x) { + // http://xorshift.di.unimi.it/splitmix64.c + x += uint64_t(0x9e3779b97f4a7c15); + x = (x ^ (x >> 30)) * uint64_t(0xbf58476d1ce4e5b9); + x = (x ^ (x >> 27)) * uint64_t(0x94d049bb133111eb); + return x ^ (x >> 31); +} + +inline uint64_t hash_combine(uint64_t &seed, const uint64_t &value) { + // mix using a cheap asymmetric binary rotation + const uint64_t r = std::numeric_limits::digits / 3; + return rotl(seed, r) ^ hash_splitmix64(value); +} + +} // namespace + /** SpvInstruction implementation **/ SpvInstruction SpvInstruction::make(SpvOp op_code) { SpvInstruction instance; @@ -39,13 +173,48 @@ void SpvInstruction::set_op_code(SpvOp op_code) { void SpvInstruction::add_operand(SpvId id) { check_defined(); contents->operands.push_back(id); - contents->immediates.push_back(false); + contents->value_types.push_back(SpvOperandId); +} + +void SpvInstruction::add_operands(const SpvInstruction::Operands &operands) { + check_defined(); + SpvInstructionContents::ValueTypes value_types(operands.size(), SpvOperandId); + contents->operands.insert(contents->operands.end(), operands.begin(), operands.end()); + contents->value_types.insert(contents->value_types.end(), value_types.begin(), value_types.end()); } -void SpvInstruction::add_immediate(SpvId id) { +void SpvInstruction::add_immediate(SpvId id, SpvValueType value_type) { check_defined(); contents->operands.push_back(id); - contents->immediates.push_back(true); + contents->value_types.push_back(value_type); +} + +void SpvInstruction::add_immediates(const SpvInstruction::Immediates &literals) { + check_defined(); + for (const SpvInstruction::LiteralValue &v : literals) { + contents->operands.push_back(v.first); // SpvId + contents->value_types.push_back(v.second); // SpvValueType + } +} + +template<> +void SpvInstruction::append(const SpvInstruction::Operands &operands) { + add_operands(operands); +} + +template<> +void SpvInstruction::append(const SpvInstruction::Immediates &immediates) { + add_immediates(immediates); +} + +template<> +void SpvInstruction::append(const std::string &str) { + add_string(str); +} + +template +void SpvInstruction::append(const T &) { + internal_error << "SPIRV: Unhandled type encountered when appending to instruction!\n"; } SpvId SpvInstruction::result_id() const { @@ -63,11 +232,26 @@ SpvOp SpvInstruction::op_code() const { return contents->op_code; } -SpvId SpvInstruction::operand(uint32_t index) { +const void *SpvInstruction::data(uint32_t index) const { + check_defined(); + return &(contents->operands[index]); +} + +SpvId SpvInstruction::operand(uint32_t index) const { check_defined(); return contents->operands[index]; } +SpvValueType SpvInstruction::value_type(uint32_t index) const { + check_defined(); + return contents->value_types[index]; +} + +const SpvInstruction::Operands &SpvInstruction::operands() const { + check_defined(); + return contents->operands; +} + bool SpvInstruction::has_type() const { if (!is_defined()) { return false; @@ -88,7 +272,7 @@ bool SpvInstruction::is_defined() const { bool SpvInstruction::is_immediate(uint32_t index) const { check_defined(); - return contents->immediates[index]; + return (contents->value_types[index] != SpvOperandId); } uint32_t SpvInstruction::length() const { @@ -101,24 +285,35 @@ SpvBlock SpvInstruction::block() const { return contents->block; } -void SpvInstruction::add_data(uint32_t bytes, const void *data) { +void SpvInstruction::add_data(uint32_t bytes, const void *data, SpvValueType value_type) { check_defined(); - uint32_t extra_words = (bytes + 3) / 4; + + uint32_t total_entries = (bytes + 3) / 4; + debug(3) << " add_data bytes=" << bytes << " total_entries=" << total_entries << "\n"; + + if (bytes == sizeof(uint32_t)) { + uint32_t entry = 0; + memcpy(&entry, data, sizeof(uint32_t)); + add_immediate(entry, value_type); + return; + } + const size_t entry_size = sizeof(uint32_t); const uint8_t *ptr = (const uint8_t *)data; size_t bytes_copied = 0; - for (uint32_t i = 0; i < extra_words; i++) { - size_t copy_size = std::min(bytes - bytes_copied, (size_t)4); + for (uint32_t i = 0; i < total_entries; i++) { + size_t copy_size = std::min(bytes - bytes_copied, entry_size); SpvId entry = 0; memcpy(&entry, ptr, copy_size); bytes_copied += copy_size; - add_immediate(entry); - ptr++; + add_immediate(entry, value_type); + ptr += entry_size; } } void SpvInstruction::add_string(const std::string &str) { check_defined(); - add_data(str.length() + 1, (const void *)str.c_str()); + debug(3) << " add_string str=" << str << " length=" << (uint32_t)str.length() << "\n"; + add_data(str.length() + 1, (const void *)str.c_str(), SpvStringData); } void SpvInstruction::check_defined() const { @@ -168,7 +363,7 @@ void SpvBlock::add_instruction(SpvInstruction inst) { void SpvBlock::add_variable(SpvInstruction var) { check_defined(); var.set_block(*this); - contents->instructions.push_back(var); + contents->variables.push_back(var); } void SpvBlock::set_function(SpvFunction func) { @@ -202,6 +397,9 @@ bool SpvBlock::is_defined() const { bool SpvBlock::is_terminated() const { check_defined(); + if (contents->instructions.empty()) { + return false; + } switch (contents->instructions.back().op_code()) { case SpvOpBranch: case SpvOpBranchConditional: @@ -259,8 +457,27 @@ bool SpvFunction::is_defined() const { return contents.defined(); } +SpvBlock SpvFunction::create_block(SpvId block_id) { + check_defined(); + if (!contents->blocks.empty()) { + SpvBlock last_block = tail_block(); + if (last_block.is_defined() && !last_block.is_terminated()) { + last_block.add_instruction(SpvFactory::branch(block_id)); + } + } + SpvBlock block = SpvBlock::make(*this, block_id); + contents->blocks.push_back(block); + return block; +} + void SpvFunction::add_block(const SpvBlock &block) { check_defined(); + if (!contents->blocks.empty()) { + SpvBlock last_block = tail_block(); + if (!last_block.is_terminated()) { + last_block.add_instruction(SpvFactory::branch(block.id())); + } + } contents->blocks.push_back(block); } @@ -279,6 +496,11 @@ SpvBlock SpvFunction::entry_block() const { return contents->blocks.front(); } +SpvBlock SpvFunction::tail_block() const { + check_defined(); + return contents->blocks.back(); +} + SpvPrecision SpvFunction::return_precision() const { check_defined(); SpvId return_id = contents->declaration.result_id(); @@ -323,6 +545,16 @@ SpvInstruction SpvFunction::declaration() const { return contents->declaration; } +const SpvFunction::Blocks &SpvFunction::blocks() const { + check_defined(); + return contents->blocks; +} + +const SpvFunction::Parameters &SpvFunction::parameters() const { + check_defined(); + return contents->parameters; +} + SpvModule SpvFunction::module() const { check_defined(); return contents->parent; @@ -380,9 +612,14 @@ bool SpvModule::is_defined() const { return contents.defined(); } -void SpvModule::add_debug(const SpvInstruction &val) { +void SpvModule::add_debug_string(SpvId result_id, const std::string &string) { + check_defined(); + contents->debug_source.push_back(SpvFactory::debug_string(result_id, string)); +} + +void SpvModule::add_debug_symbol(SpvId id, const std::string &symbol) { check_defined(); - contents->debug.push_back(val); + contents->debug_symbols.push_back(SpvFactory::debug_symbol(id, symbol)); } void SpvModule::add_annotation(const SpvInstruction &val) { @@ -426,6 +663,16 @@ void SpvModule::add_entry_point(const std::string &name, SpvInstruction inst) { contents->entry_points[name] = std::move(inst); } +void SpvModule::set_binding_count(SpvId val) { + check_defined(); + contents->binding_count = val; +} + +void SpvModule::set_version_format(uint32_t val) { + check_defined(); + contents->version_format = val; +} + void SpvModule::set_source_language(SpvSourceLanguage val) { check_defined(); contents->source_language = val; @@ -441,6 +688,21 @@ void SpvModule::set_memory_model(SpvMemoryModel val) { contents->memory_model = val; } +uint32_t SpvModule::entry_point_count() const { + check_defined(); + return (uint32_t)contents->entry_points.size(); +} + +uint32_t SpvModule::binding_count() const { + check_defined(); + return contents->binding_count; +} + +uint32_t SpvModule::version_format() const { + check_defined(); + return contents->version_format; +} + SpvSourceLanguage SpvModule::source_language() const { check_defined(); return contents->source_language; @@ -451,11 +713,77 @@ SpvAddressingModel SpvModule::addressing_model() const { return contents->addressing_model; } +SpvModule::Imports SpvModule::imports() const { + check_defined(); + SpvModule::Imports results; + results.reserve(contents->imports.size()); + for (const SpvModuleContents::Imports::value_type &v : contents->imports) { + SpvModule::ImportDefinition definition = {v.second, v.first}; + results.push_back(definition); + } + return results; +} + +SpvModule::Extensions SpvModule::extensions() const { + check_defined(); + SpvModule::Extensions results; + results.reserve(contents->extensions.size()); + for (const SpvModuleContents::Extensions::value_type &v : contents->extensions) { + results.push_back(v); + } + return results; +} + +SpvModule::Capabilities SpvModule::capabilities() const { + check_defined(); + SpvModule::Capabilities results; + results.reserve(contents->capabilities.size()); + for (const SpvModuleContents::Capabilities::value_type &v : contents->capabilities) { + results.push_back(v); + } + return results; +} + const SpvModule::Instructions &SpvModule::execution_modes() const { check_defined(); return contents->execution_modes; } +const SpvModule::Instructions &SpvModule::debug_source() const { + check_defined(); + return contents->debug_source; +} + +const SpvModule::Instructions &SpvModule::debug_symbols() const { + check_defined(); + return contents->debug_symbols; +} + +const SpvModule::Instructions &SpvModule::annotations() const { + check_defined(); + return contents->annotations; +} + +const SpvModule::Instructions &SpvModule::type_definitions() const { + check_defined(); + return contents->types; +} + +const SpvModule::Instructions &SpvModule::global_constants() const { + check_defined(); + return contents->constants; +} + +const SpvModule::Instructions &SpvModule::global_variables() const { + check_defined(); + return contents->globals; +} + +const SpvModule::Functions &SpvModule::function_definitions() const { + check_defined(); + return contents->functions; +} + SpvMemoryModel SpvModule::memory_model() const { check_defined(); return contents->memory_model; @@ -471,6 +799,13 @@ SpvInstruction SpvModule::entry_point(const std::string &name) const { } } +void SpvModule::import_instruction_set(SpvId id, const std::string &instruction_set) { + check_defined(); + if (contents->imports.find(instruction_set) == contents->imports.end()) { + contents->imports.insert({instruction_set, id}); + } +} + void SpvModule::require_extension(const std::string &extension) { check_defined(); if (contents->extensions.find(extension) == contents->extensions.end()) { @@ -478,6 +813,14 @@ void SpvModule::require_extension(const std::string &extension) { } } +bool SpvModule::is_imported(const std::string &instruction_set) const { + check_defined(); + if (contents->imports.find(instruction_set) != contents->imports.end()) { + return true; + } + return false; +} + bool SpvModule::is_extension_required(const std::string &extension) const { check_defined(); if (contents->extensions.find(extension) != contents->extensions.end()) { @@ -503,13 +846,43 @@ bool SpvModule::is_capability_required(SpvCapability capability) const { SpvModule::EntryPointNames SpvModule::entry_point_names() const { check_defined(); - SpvModule::EntryPointNames entry_point_names(contents->entry_points.size()); + SpvModule::EntryPointNames entry_point_names; + entry_point_names.reserve(contents->entry_points.size()); for (const SpvModuleContents::EntryPoints::value_type &v : contents->entry_points) { entry_point_names.push_back(v.first); } return entry_point_names; } +SpvModule::Instructions SpvModule::entry_points() const { + check_defined(); + SpvModule::Instructions entry_points; + entry_points.reserve(contents->entry_points.size()); + for (const SpvModuleContents::EntryPoints::value_type &v : contents->entry_points) { + entry_points.push_back(v.second); + } + return entry_points; +} + +SpvModule::ImportNames SpvModule::import_names() const { + check_defined(); + SpvModule::ImportNames results; + results.reserve(contents->imports.size()); + for (const SpvModuleContents::Imports::value_type &v : contents->imports) { + results.push_back(v.first); + } + return results; +} + +SpvId SpvModule::lookup_import(const std::string &instruction_set) const { + SpvId result_id = SpvInvalidId; + SpvModuleContents::Imports::const_iterator it = contents->imports.find(instruction_set); + if (it != contents->imports.end()) { + result_id = it->second; + } + return result_id; +} + SpvId SpvModule::id() const { check_defined(); return contents->module_id; @@ -524,10 +897,10 @@ void SpvModule::encode(SpvBinary &binary) const { // 0. Encode the header binary.push_back(SpvMagicNumber); - binary.push_back(SpvVersion); + binary.push_back(contents->version_format); binary.push_back(contents->source_language); - binary.push_back(0); // Bound placeholder (aka last id used) - binary.push_back(0); // Reserved for schema. + binary.push_back(contents->binding_count); // last id bound to this module (aka last id used) + binary.push_back(0); // Reserved for schema. // 1. Capabilities for (const SpvCapability &capability : contents->capabilities) { @@ -542,8 +915,10 @@ void SpvModule::encode(SpvBinary &binary) const { } // 3. Extended Instruction Set Imports - for (const std::string &import : contents->imports) { - SpvInstruction inst = SpvFactory::import(import); + for (const SpvModuleContents::Imports::value_type &import : contents->imports) { + const std::string &import_name = import.first; + SpvId import_id = import.second; + SpvInstruction inst = SpvFactory::import(import_id, import_name); inst.encode(binary); } @@ -562,8 +937,11 @@ void SpvModule::encode(SpvBinary &binary) const { inst.encode(binary); } - // 7. Debug - for (const SpvInstruction &inst : contents->debug) { + // 7. Debug Source & Names + for (const SpvInstruction &inst : contents->debug_source) { + inst.encode(binary); + } + for (const SpvInstruction &inst : contents->debug_symbols) { inst.encode(binary); } @@ -596,27 +974,168 @@ void SpvModule::encode(SpvBinary &binary) const { // -- SpvBuilder::SpvBuilder() { - SpvId module_id = declare_id(SpvModuleId); + reset(); +} + +void SpvBuilder::reset() { + active_id = SpvInvalidId; + active_function = SpvFunction(); + active_block = SpvBlock(); + + kind_map.clear(); + type_map.clear(); + struct_map.clear(); + scope_map.clear(); + string_map.clear(); + constant_map.clear(); + function_map.clear(); + id_symbol_map.clear(); + symbol_id_map.clear(); + base_type_map.clear(); + storage_class_map.clear(); + pointer_type_map.clear(); + variable_type_map.clear(); + function_type_map.clear(); + + SpvId module_id = make_id(SpvModuleId); module = SpvModule::make(module_id); } SpvId SpvBuilder::reserve_id(SpvKind kind) { - return declare_id(kind); + return make_id(kind); } -SpvId SpvBuilder::declare_id(SpvKind kind) { +SpvId SpvBuilder::make_id(SpvKind kind) { // use type-agnostic non-overlapping increasing ids SpvId item_id = kind_map.size() + 1; + debug(3) << " make_id: %" << item_id << " kind=" << kind_name(kind) << "\n"; kind_map[item_id] = kind; return item_id; } -SpvKind SpvBuilder::kind_of(SpvId item_id) { +std::string SpvBuilder::kind_name(SpvKind kind) const { + switch (kind) { + case SpvInvalidItem: { + return "InvalidItem"; + } + case SpvTypeId: { + return "TypeId"; + } + case SpvVoidTypeId: { + return "VoidTypeId"; + } + case SpvBoolTypeId: { + return "BoolTypeId"; + } + case SpvIntTypeId: { + return "IntTypeId"; + } + case SpvFloatTypeId: { + return "FloatTypeId"; + } + case SpvVectorTypeId: { + return "VectorTypeId"; + } + case SpvArrayTypeId: { + return "ArrayTypeId"; + } + case SpvRuntimeArrayTypeId: { + return "RuntimeArrayTypeId"; + } + case SpvStringTypeId: { + return "StringTypeId"; + } + case SpvPointerTypeId: { + return "PointerTypeId"; + } + case SpvStructTypeId: { + return "StructTypeId"; + } + case SpvFunctionTypeId: { + return "FunctionTypeId"; + } + case SpvAccessChainId: { + return "AccessChainId"; + } + case SpvConstantId: { + return "ConstantId"; + } + case SpvBoolConstantId: { + return "BoolConstantId"; + } + case SpvIntConstantId: { + return "IntConstantId"; + } + case SpvFloatConstantId: { + return "FloatConstantId"; + } + case SpvStringConstantId: { + return "StringConstantId"; + } + case SpvCompositeConstantId: { + return "CompositeConstantId"; + } + case SpvResultId: { + return "ResultId"; + } + case SpvVariableId: { + return "VariableId"; + } + case SpvInstructionId: { + return "InstructionId"; + } + case SpvFunctionId: { + return "FunctionId"; + } + case SpvBlockId: { + return "BlockId"; + } + case SpvLabelId: { + return "LabelId"; + } + case SpvParameterId: { + return "ParameterId"; + } + case SpvModuleId: { + return "ModuleId"; + } + case SpvUnknownItem: { + return "UnknownItem"; + } + default: { + return "InvalidItem"; + } + }; + return "InvalidItem"; +} + +SpvKind SpvBuilder::kind_of(SpvId item_id) const { KindMap::const_iterator it = kind_map.find(item_id); if (it != kind_map.end()) { - return SpvInvalidItem; + return it->second; + } + return SpvInvalidItem; +} + +SpvId SpvBuilder::type_of(SpvId variable_id) const { + VariableTypeMap::const_iterator it = variable_type_map.find(variable_id); + if (it != variable_type_map.end()) { + return it->second; + } + return SpvInvalidId; +} + +void SpvBuilder::finalize() { + SpvId last_id = (SpvId)(kind_map.size() + 1); + module.set_binding_count(last_id); + + if (module.is_capability_required(SpvCapabilityInt8)) { + module.require_extension("SPV_KHR_8bit_storage"); + } + + if (module.is_capability_required(SpvCapabilityInt16)) { + module.require_extension("SPV_KHR_16bit_storage"); } - return it->second; } void SpvBuilder::encode(SpvBinary &binary) const { @@ -624,125 +1143,231 @@ void SpvBuilder::encode(SpvBinary &binary) const { module.encode(binary); } -SpvId SpvBuilder::map_type(const Type &type, uint32_t array_size) { +SpvId SpvBuilder::declare_type(const Type &type, uint32_t array_size) { SpvId type_id = lookup_type(type, array_size); if (type_id == SpvInvalidId) { - type_id = declare_type(type, array_size); + type_id = add_type(type, array_size); } return type_id; } -SpvId SpvBuilder::map_pointer_type(const Type &type, SpvStorageClass storage_class) { +SpvId SpvBuilder::declare_pointer_type(const Type &type, SpvStorageClass storage_class) { SpvId ptr_type_id = lookup_pointer_type(type, storage_class); if (ptr_type_id == SpvInvalidId) { - ptr_type_id = declare_pointer_type(ptr_type_id, storage_class); + ptr_type_id = add_pointer_type(type, storage_class); } return ptr_type_id; } -SpvId SpvBuilder::map_pointer_type(SpvId type_id, SpvStorageClass storage_class) { +SpvId SpvBuilder::declare_pointer_type(SpvId type_id, SpvStorageClass storage_class) { SpvId ptr_type_id = lookup_pointer_type(type_id, storage_class); if (ptr_type_id == SpvInvalidId) { - ptr_type_id = declare_pointer_type(type_id, storage_class); + ptr_type_id = add_pointer_type(type_id, storage_class); } return ptr_type_id; } -SpvId SpvBuilder::map_function_type(SpvId return_type, const ParamTypes ¶m_types) { +SpvId SpvBuilder::declare_function_type(SpvId return_type, const ParamTypes ¶m_types) { SpvId type_id = lookup_function_type(return_type, param_types); if (type_id == SpvInvalidId) { - type_id = declare_function_type(return_type, param_types); + type_id = add_function_type(return_type, param_types); } return type_id; } -SpvId SpvBuilder::map_constant(const Type &type, const void *data) { - SpvId result_id = lookup_constant(type, data); +SpvId SpvBuilder::declare_function(const std::string &name, SpvId function_type) { + SpvId existing_id = lookup_id(name); + if (existing_id != SpvInvalidId) { + if (kind_of(existing_id) == SpvFunctionId) { + SpvFunction existing_func = lookup_function(existing_id); + if (existing_func.type_id() == function_type) { + return existing_id; + } + } + } + return add_function(name, function_type); +} + +SpvId SpvBuilder::declare_constant(const Type &type, const void *data, bool is_specialization) { + SpvId result_id = lookup_constant(type, data, is_specialization); if (result_id == SpvInvalidId) { - result_id = declare_constant(type, data); + result_id = add_constant(type, data, is_specialization); } return result_id; } -void SpvBuilder::add_entry_point(const std::string &name, - SpvId func_id, SpvExecutionModel exec_model, - const Variables &variables) { - - SpvInstruction inst = SpvFactory::entry_point(exec_model, func_id, name, variables); - module.add_entry_point(name, inst); +SpvId SpvBuilder::declare_symbol(const std::string &symbol, SpvId id, SpvId scope_id) { + SpvId existing_id = lookup_id(symbol); + if (existing_id != SpvInvalidId) { + SpvId existing_scope = lookup_scope(existing_id); + if (existing_scope == scope_id) { + return existing_id; + } + } + add_symbol(symbol, id, scope_id); + return id; } -SpvFunction SpvBuilder::add_function(SpvId return_type_id, const ParamTypes ¶m_types) { - SpvId func_id = declare_id(SpvFunctionId); - SpvId func_type_id = map_function_type(return_type_id, param_types); - SpvFunction func = SpvFunction::make(func_type_id, func_id, return_type_id); - for (SpvId param_type_id : param_types) { - SpvId param_id = declare_id(SpvParameterId); - SpvInstruction param_inst = SpvFactory::function_parameter(param_type_id, param_id); - func.add_parameter(param_inst); - map_instruction(param_inst); +SpvStorageClass SpvBuilder::lookup_storage_class(SpvId id) const { + SpvStorageClass result = SpvInvalidStorageClass; + StorageClassMap::const_iterator it = storage_class_map.find(id); + if (it != storage_class_map.end()) { + result = it->second; } - SpvId block_id = declare_id(SpvBlockId); - SpvBlock entry_block = SpvBlock::make(func, block_id); - func.add_block(entry_block); - module.add_function(func); - function_map[func_id] = func; - map_instruction(func.declaration()); - return func; + return result; +} + +SpvId SpvBuilder::lookup_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId scope_id) const { + SpvId existing_id = lookup_id(name); + if (existing_id != SpvInvalidId) { + if ((kind_of(existing_id) == SpvVariableId) && + (type_of(existing_id) == type_id) && + (lookup_storage_class(existing_id) == storage_class) && + (lookup_scope(existing_id) == scope_id)) { + return existing_id; + } + } + return SpvInvalidId; +} + +bool SpvBuilder::has_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId scope_id) const { + return (lookup_variable(name, type_id, storage_class, scope_id) != SpvInvalidId); } -SpvId SpvBuilder::add_global_variable(SpvId type_id, uint32_t storage_class, SpvId init_id) { +SpvId SpvBuilder::declare_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId init_id) { + SpvId block_id = current_function().entry_block().id(); + SpvId existing_id = lookup_variable(name, type_id, storage_class, block_id); + if (existing_id != SpvInvalidId) { + return existing_id; + } SpvId var_id = reserve_id(SpvVariableId); - module.add_global(SpvFactory::variable(var_id, type_id, storage_class, init_id)); + debug(3) << " declare_variable: %" << var_id << "\n" + << " name='" << name << "'\n" + << " type_id=" << type_id << "\n" + << " storage_class=" << (uint32_t)storage_class << "\n" + << " init_id=" << init_id << "\n"; + current_function().entry_block().add_variable(SpvFactory::variable(var_id, type_id, storage_class, init_id)); + declare_symbol(name, var_id, block_id); + storage_class_map[var_id] = storage_class; + variable_type_map[var_id] = type_id; return var_id; } -SpvId SpvBuilder::add_variable(SpvId type_id, uint32_t storage_class, SpvId init_id) { +SpvId SpvBuilder::declare_global_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId init_id) { SpvId var_id = reserve_id(SpvVariableId); - current_block().add_variable(SpvFactory::variable(var_id, type_id, storage_class, init_id)); + debug(3) << " declare_global_variable: %" << var_id << "\n" + << " name='" << name << "'\n" + << " type_id=" << type_id << "\n" + << " storage_class=" << (uint32_t)storage_class << "\n" + << " init_id=" << init_id << "\n"; + module.add_global(SpvFactory::variable(var_id, type_id, storage_class, init_id)); + declare_symbol(name, var_id, module.id()); + storage_class_map[var_id] = storage_class; + variable_type_map[var_id] = type_id; return var_id; } +void SpvBuilder::add_entry_point(SpvId func_id, SpvExecutionModel exec_model, + const Variables &variables) { + + const std::string &func_name = lookup_symbol(func_id); + if (func_name.empty()) { + internal_error << "SPIRV: Function missing name definition: " << func_id << "\n"; + } else { + debug(3) << " add_entry_point: %" << func_id << "\n" + << " func_name='" << func_name << "'\n" + << " exec_model=" << (uint32_t)exec_model << "\n" + << " variable_count=" << (uint32_t)variables.size() << "\n"; + SpvInstruction inst = SpvFactory::entry_point(exec_model, func_id, func_name, variables); + module.add_entry_point(func_name, inst); + } +} + +SpvId SpvBuilder::add_function(const std::string &name, SpvId return_type_id, const ParamTypes ¶m_types) { + SpvId func_id = make_id(SpvFunctionId); + SpvId func_type_id = declare_function_type(return_type_id, param_types); + debug(3) << " add_function: %" << func_id << "\n" + << " func_type_id=" << func_type_id << "\n" + << " return_type_id=" << return_type_id << "\n" + << " parameter_count=" << (uint32_t)param_types.size() << "\n"; + SpvFunction func = SpvFunction::make(func_type_id, func_id, return_type_id); + for (SpvId param_type_id : param_types) { + SpvId param_id = make_id(SpvParameterId); + SpvInstruction param_inst = SpvFactory::function_parameter(param_type_id, param_id); + func.add_parameter(param_inst); + } + SpvId block_id = make_id(SpvBlockId); + SpvBlock entry_block = SpvBlock::make(func, block_id); + func.add_block(entry_block); + module.add_function(func); + function_map[func_id] = func; + declare_symbol(name, func_id, module.id()); + return func_id; +} + void SpvBuilder::add_annotation(SpvId target_id, SpvDecoration decoration_type, const Literals &literals) { SpvInstruction inst = SpvFactory::decorate(target_id, decoration_type, literals); + debug(3) << " add_annotation: %" << target_id << "\n" + << " decoration_type=" << uint32_t(decoration_type) << "\n" + << " literals=["; + for (uint32_t v : literals) { + debug(3) << " " << v; + } + debug(3) << " ]\n"; current_module().add_annotation(inst); } void SpvBuilder::add_struct_annotation(SpvId struct_type_id, uint32_t member_index, SpvDecoration decoration_type, const Literals &literals) { SpvInstruction inst = SpvFactory::decorate_member(struct_type_id, member_index, decoration_type, literals); + debug(3) << " add_struct_annotation: %" << struct_type_id << "\n" + << " member_index=" << member_index << "\n" + << " decoration_type=" << uint32_t(decoration_type) << "\n" + << " literals=["; + for (uint32_t v : literals) { + debug(3) << " " << v; + } + debug(3) << " ]\n"; current_module().add_annotation(inst); } void SpvBuilder::add_execution_mode_local_size(SpvId func_id, - uint32_t wg_size_x, uint32_t wg_size_y, uint32_t wg_size_z) { + uint32_t local_size_x, + uint32_t local_size_y, + uint32_t local_size_z) { - wg_size_x = std::max(wg_size_x, (uint32_t)1); - wg_size_y = std::max(wg_size_y, (uint32_t)1); - wg_size_z = std::max(wg_size_z, (uint32_t)1); + local_size_x = std::max(local_size_x, (uint32_t)1); + local_size_y = std::max(local_size_y, (uint32_t)1); + local_size_z = std::max(local_size_z, (uint32_t)1); - SpvInstruction exec_mode_inst = SpvFactory::exec_mode_local_size(func_id, wg_size_x, wg_size_y, wg_size_z); + SpvInstruction exec_mode_inst = SpvFactory::exec_mode_local_size(func_id, local_size_x, local_size_y, local_size_z); + module.add_execution_mode(exec_mode_inst); +} + +void SpvBuilder::add_execution_mode_local_size_id(SpvId func_id, + SpvId local_size_x_id, + SpvId local_size_y_id, + SpvId local_size_z_id) { + + SpvInstruction exec_mode_inst = SpvFactory::exec_mode_local_size(func_id, local_size_x_id, local_size_y_id, local_size_z_id); module.add_execution_mode(exec_mode_inst); } void SpvBuilder::enter_block(const SpvBlock &block) { - block_stack.push(block); + active_block = block; } SpvBlock SpvBuilder::current_block() const { - SpvBlock block; - if (!block_stack.empty()) { - block = block_stack.top(); - } - return block; + return active_block; +} + +SpvBlock SpvBuilder::create_block(SpvId block_id) { + return current_function().create_block(block_id); } SpvBlock SpvBuilder::leave_block() { - SpvBlock block; - if (!block_stack.empty()) { - block = block_stack.top(); - block_stack.pop(); - } - return block; + SpvBlock prev_block = active_block; + active_block = SpvBlock(); + return prev_block; } SpvFunction SpvBuilder::lookup_function(SpvId func_id) const { @@ -754,47 +1379,125 @@ SpvFunction SpvBuilder::lookup_function(SpvId func_id) const { return func; } -void SpvBuilder::enter_function(const SpvFunction &func) { - function_stack.push(func); - enter_block(func.entry_block()); +std::string SpvBuilder::lookup_symbol(SpvId id) const { + std::string name; + IdSymbolMap::const_iterator it = id_symbol_map.find(id); + if (it != id_symbol_map.end()) { + name = it->second; + } + return name; } -SpvFunction SpvBuilder::current_function() const { - SpvFunction func; - if (!function_stack.empty()) { - func = function_stack.top(); +SpvId SpvBuilder::lookup_id(const std::string &symbol) const { + SpvId result = SpvInvalidId; + SymbolIdMap::const_iterator it = symbol_id_map.find(symbol); + if (it != symbol_id_map.end()) { + result = it->second; } - return func; + return result; } -SpvFunction SpvBuilder::leave_function() { - SpvFunction func; - leave_block(); - if (!function_stack.empty()) { - func = function_stack.top(); - function_stack.pop(); +void SpvBuilder::add_symbol(const std::string &symbol, SpvId id, SpvId scope_id) { + symbol_id_map[symbol] = id; + id_symbol_map[id] = symbol; + scope_map[id] = scope_id; + debug(3) << " add_symbol: %" << id << "\n" + << " symbol='" << symbol << "'\n" + << " scope_id=" << scope_id << "\n"; + module.add_debug_symbol(id, symbol); +} + +SpvId SpvBuilder::lookup_scope(SpvId id) const { + SpvId result = SpvInvalidId; + ScopeMap::const_iterator it = scope_map.find(id); + if (it != scope_map.end()) { + result = it->second; } - return func; + return result; +} + +SpvId SpvBuilder::lookup_import(const std::string &instruction_set) const { + return module.lookup_import(instruction_set); +} + +void SpvBuilder::enter_function(const SpvFunction &func) { + active_function = func; + enter_block(active_function.entry_block()); } -void SpvBuilder::set_current_id(SpvId val) { - scope_id = val; +SpvFunction SpvBuilder::current_function() const { + return active_function; +} + +SpvFunction SpvBuilder::leave_function() { + SpvFunction prev_func = active_function; + active_function = SpvFunction(); + return prev_func; } SpvId SpvBuilder::current_id() const { - return scope_id; + return active_id; +} + +void SpvBuilder::update_id(SpvId id) { + active_id = id; } SpvModule SpvBuilder::current_module() const { return module; } +void SpvBuilder::set_version_format(uint32_t val) { + module.set_version_format(val); +} + +void SpvBuilder::set_source_language(SpvSourceLanguage val) { + module.set_source_language(val); +} + +void SpvBuilder::set_addressing_model(SpvAddressingModel val) { + module.set_addressing_model(val); +} + +void SpvBuilder::set_memory_model(SpvMemoryModel val) { + module.set_memory_model(val); +} + +SpvSourceLanguage SpvBuilder::source_language() const { + return module.source_language(); +} + +SpvAddressingModel SpvBuilder::addressing_model() const { + return module.addressing_model(); +} + +SpvMemoryModel SpvBuilder::memory_model() const { + return module.memory_model(); +} + +SpvId SpvBuilder::import_glsl_intrinsics() { + return import_instruction_set("GLSL.std.450"); +} + +SpvId SpvBuilder::import_instruction_set(const std::string &instruction_set) { + SpvId result_id = module.lookup_import(instruction_set); + if (result_id == SpvInvalidId) { + result_id = make_id(SpvImportId); + module.import_instruction_set(result_id, instruction_set); + } + return result_id; +} + void SpvBuilder::require_capability(SpvCapability capability) { if (!module.is_capability_required(capability)) { module.require_capability(capability); } } +bool SpvBuilder::is_imported(const std::string &instruction_set) const { + return module.is_imported(instruction_set); +} + bool SpvBuilder::is_capability_required(SpvCapability capability) const { return module.is_capability_required(capability); } @@ -810,15 +1513,11 @@ bool SpvBuilder::is_extension_required(const std::string &extension) const { } SpvBuilder::TypeKey SpvBuilder::make_type_key(const Type &type, uint32_t array_size) const { - TypeKey key(4 + sizeof(uint32_t), ' '); - key[0] = type.code(); - key[1] = type.bits(); - key[2] = type.lanes() & 0xff; - key[3] = (type.lanes() >> 8) & 0xff; - for (size_t i = 0; i < sizeof(uint32_t); i++) { - key[i + 4] = (array_size & 0xff); - array_size >>= 8; - } + TypeKey key = hash_splitmix64(type.code()); + key = hash_combine(key, type.bits()); + key = hash_combine(key, type.lanes()); + key = hash_combine(key, type.bytes()); + key = hash_combine(key, array_size); return key; } @@ -831,7 +1530,7 @@ SpvId SpvBuilder::lookup_type(const Type &type, uint32_t array_size) const { return it->second; } -SpvId SpvBuilder::declare_type(const Type &type, uint32_t array_size) { +SpvId SpvBuilder::add_type(const Type &type, uint32_t array_size) { SpvBuilder::TypeKey type_key = make_type_key(type, array_size); TypeMap::const_iterator it = type_map.find(type_key); if (it != type_map.end()) { @@ -839,9 +1538,22 @@ SpvId SpvBuilder::declare_type(const Type &type, uint32_t array_size) { } if (array_size > 1) { - SpvId array_type_id = declare_id(SpvArrayTypeId); - SpvId element_type_id = declare_type(type, 1); - SpvInstruction inst = SpvFactory::array_type(array_type_id, element_type_id, array_size); + // first declare the array size as a uint32 constant value + Type array_size_type = UInt(32); + ConstantKey constant_key = make_constant_key(array_size_type, &array_size); + SpvId array_size_id = make_id(SpvIntConstantId); + SpvId array_size_type_id = add_type(array_size_type); + SpvInstruction array_size_inst = SpvFactory::constant(array_size_id, array_size_type_id, array_size_type.bytes(), &array_size, SpvIntegerData); + module.add_type(array_size_inst); // needs to be defined in the type section (prior to its use in the array_type inst) + constant_map[constant_key] = array_size_id; + + // declare the array type + SpvId array_type_id = make_id(SpvArrayTypeId); + SpvId element_type_id = add_type(type, 1); + debug(3) << " add_array_type: %" << array_type_id << "\n" + << " element_type_id='" << element_type_id << "\n" + << " array_size='" << array_size << "\n"; + SpvInstruction inst = SpvFactory::array_type(array_type_id, element_type_id, array_size_id); module.add_type(inst); type_map[type_key] = array_type_id; return array_type_id; @@ -849,28 +1561,55 @@ SpvId SpvBuilder::declare_type(const Type &type, uint32_t array_size) { SpvId type_id = SpvInvalidId; if (type.is_vector()) { - type_id = declare_id(SpvVectorTypeId); - SpvId element_type_id = declare_type(type.with_lanes(1)); + type_id = make_id(SpvVectorTypeId); + SpvId element_type_id = add_type(type.with_lanes(1)); + debug(3) << " add_vector_type: %" << type_id << "\n" + << " element_type_id='" << element_type_id << "\n" + << " lanes='" << type.lanes() << "\n"; SpvInstruction inst = SpvFactory::vector_type(type_id, element_type_id, type.lanes()); module.add_type(inst); } else { if (type.is_handle()) { - type_id = declare_id(SpvVoidTypeId); + type_id = make_id(SpvVoidTypeId); SpvInstruction inst = SpvFactory::void_type(type_id); + debug(3) << " add_void_type: %" << type_id << "\n"; module.add_type(inst); } else if (type.is_bool()) { - type_id = declare_id(SpvBoolTypeId); + type_id = make_id(SpvBoolTypeId); + debug(3) << " add_bool_type: %" << type_id << "\n"; SpvInstruction inst = SpvFactory::bool_type(type_id); module.add_type(inst); } else if (type.is_float()) { - type_id = declare_id(SpvFloatTypeId); + type_id = make_id(SpvFloatTypeId); + debug(3) << " add_float_type: %" << type_id << "\n" + << " bits=" << type.bits() << "\n"; SpvInstruction inst = SpvFactory::float_type(type_id, type.bits()); module.add_type(inst); + if (type.bits() == 16) { + module.require_capability(SpvCapabilityFloat16); + } else if (type.bits() == 64) { + module.require_capability(SpvCapabilityFloat64); + } } else if (type.is_int_or_uint()) { - type_id = declare_id(SpvIntTypeId); - SpvId signedness = type.is_uint() ? 0 : 1; + SpvId signedness = 0; + bool signedness_support = module.is_capability_required(SpvCapabilityKernel) ? false : true; // kernel execution doesn't track signedness + if (signedness_support) { + signedness = type.is_uint() ? 0 : 1; + } + + type_id = make_id(signedness ? SpvIntTypeId : SpvUIntTypeId); + debug(3) << " add_integer_type: %" << type_id << "\n" + << " bits=" << type.bits() << "\n" + << " signed=" << (signedness ? "true" : "false") << "\n"; SpvInstruction inst = SpvFactory::integer_type(type_id, type.bits(), signedness); module.add_type(inst); + if (type.bits() == 8) { + module.require_capability(SpvCapabilityInt8); + } else if (type.bits() == 16) { + module.require_capability(SpvCapabilityInt16); + } else if (type.bits() == 64) { + module.require_capability(SpvCapabilityInt64); + } } else { internal_error << "SPIRV: Unsupported type " << type << "\n"; } @@ -880,41 +1619,61 @@ SpvId SpvBuilder::declare_type(const Type &type, uint32_t array_size) { return type_id; } +SpvId SpvBuilder::declare_void_type() { + return declare_type(Handle()); +} + SpvBuilder::TypeKey SpvBuilder::make_struct_type_key(const StructMemberTypes &member_type_ids) const { - TypeKey key(member_type_ids.size() * sizeof(SpvId), ' '); - uint32_t index = 0; + TypeKey key = hash_splitmix64(member_type_ids.size()); for (SpvId type_id : member_type_ids) { - for (size_t i = 0; i < sizeof(uint32_t); i++, index++) { - key[index] = (type_id & 0xff); - type_id >>= 8; - } + key = hash_combine(key, type_id); } return key; } -SpvId SpvBuilder::lookup_struct(const StructMemberTypes &member_type_ids) const { +SpvId SpvBuilder::lookup_struct(const std::string &struct_name, const StructMemberTypes &member_type_ids) const { TypeKey key = make_struct_type_key(member_type_ids); TypeMap::const_iterator it = struct_map.find(key); if (it != struct_map.end()) { - return it->second; + if (struct_name == lookup_symbol(it->second)) { + return it->second; + } } return SpvInvalidId; } -SpvId SpvBuilder::declare_struct(const StructMemberTypes &member_type_ids) { +SpvId SpvBuilder::add_struct(const std::string &struct_name, const StructMemberTypes &member_type_ids) { TypeKey key = make_struct_type_key(member_type_ids); TypeMap::const_iterator it = struct_map.find(key); if (it != struct_map.end()) { - return it->second; + if (struct_name == lookup_symbol(it->second)) { + return it->second; + } } - SpvId struct_type_id = declare_id(SpvStructTypeId); + SpvId struct_type_id = make_id(SpvStructTypeId); + debug(3) << " add_struct_type: %" << struct_type_id << "\n" + << " name=" << struct_name << "\n" + << " member_type_ids=["; + for (SpvId m : member_type_ids) { + debug(3) << " " << m; + } + debug(3) << " ]\n"; SpvInstruction inst = SpvFactory::struct_type(struct_type_id, member_type_ids); module.add_type(inst); struct_map[key] = struct_type_id; + add_symbol(struct_name, struct_type_id, module.id()); return struct_type_id; } +SpvId SpvBuilder::declare_struct(const std::string &struct_name, const StructMemberTypes &member_types) { + SpvId struct_id = lookup_struct(struct_name, member_types); + if (struct_id == SpvInvalidId) { + struct_id = add_struct(struct_name, member_types); + } + return struct_id; +} + SpvBuilder::PointerTypeKey SpvBuilder::make_pointer_type_key(const Type &type, SpvStorageClass storage_class) const { SpvId base_type_id = lookup_type(type); if (base_type_id == SpvInvalidId) { @@ -944,54 +1703,72 @@ SpvId SpvBuilder::lookup_pointer_type(SpvId base_type_id, SpvStorageClass storag return SpvInvalidId; } -SpvId SpvBuilder::declare_pointer_type(const Type &type, SpvStorageClass storage_class) { - SpvId base_type_id = map_type(type); - return declare_pointer_type(base_type_id, storage_class); +SpvId SpvBuilder::add_pointer_type(const Type &type, SpvStorageClass storage_class) { + SpvId base_type_id = declare_type(type); + debug(3) << " add_pointer_type: " << type << "\n" + << " base_type_id=" << base_type_id << "\n" + << " storage_class=" << (uint32_t)(storage_class) << "\n"; + if (base_type_id == SpvInvalidId) { + internal_error << "SPIRV: Attempted to create pointer type for undeclared base type! " << type << "\n"; + } + return add_pointer_type(base_type_id, storage_class); } -SpvId SpvBuilder::declare_pointer_type(SpvId base_type_id, SpvStorageClass storage_class) { +SpvId SpvBuilder::add_pointer_type(SpvId base_type_id, SpvStorageClass storage_class) { + if (base_type_id == SpvInvalidId) { + internal_error << "SPIRV: Attempted to create pointer type for undeclared base type!\n"; + } + PointerTypeKey key = make_pointer_type_key(base_type_id, storage_class); PointerTypeMap::const_iterator it = pointer_type_map.find(key); if (it != pointer_type_map.end()) { return it->second; } - SpvId pointer_type_id = declare_id(SpvPointerTypeId); + SpvId pointer_type_id = make_id(SpvPointerTypeId); + debug(3) << " add_pointer_type: %" << pointer_type_id << "\n" + << " base_type_id=" << base_type_id << "\n" + << " storage_class=" << (uint32_t)(storage_class) << "\n"; SpvInstruction inst = SpvFactory::pointer_type(pointer_type_id, storage_class, base_type_id); module.add_type(inst); pointer_type_map[key] = pointer_type_id; + storage_class_map[pointer_type_id] = storage_class; + base_type_map[pointer_type_id] = base_type_id; return pointer_type_id; } -SpvBuilder::ConstantKey SpvBuilder::make_constant_key(const Type &type, const void *data) const { - ConstantKey key(type.bytes() + 4, ' '); - key[0] = type.code(); - key[1] = type.bits(); - key[2] = type.lanes() & 0xff; - key[3] = (type.lanes() >> 8) & 0xff; - const char *data_char = (const char *)data; - for (int i = 0; i < type.bytes(); i++) { - key[i + 4] = data_char[i]; +SpvBuilder::ConstantKey SpvBuilder::make_constant_key(uint8_t code, uint8_t bits, int lanes, size_t bytes, const void *data, bool is_specialization) const { + ConstantKey key = hash_splitmix64(code); + key = hash_combine(key, bits); + key = hash_combine(key, lanes); + key = hash_combine(key, bytes); + key = hash_combine(key, is_specialization ? uint64_t(-1) : uint64_t(1)); + + if (data != nullptr) { + const int8_t *ptr = reinterpret_bits(data); + for (size_t i = 0; i < bytes; ++i) { + key = hash_combine(key, uint64_t(ptr[i])); + } } return key; } +SpvBuilder::ConstantKey SpvBuilder::make_constant_key(const Type &type, const void *data, bool is_specialization) const { + return make_constant_key(type.code(), type.bits(), type.lanes(), type.bytes(), data, is_specialization); +} + SpvBuilder::ConstantKey SpvBuilder::make_bool_constant_key(bool value) const { Type type = Bool(); bool data = value; return make_constant_key(type, &data); } +SpvBuilder::ConstantKey SpvBuilder::make_string_constant_key(const std::string &value) const { + return make_constant_key(halide_type_handle, 8, 1, value.length(), (const char *)(value.c_str())); +} + SpvBuilder::ConstantKey SpvBuilder::make_null_constant_key(const Type &type) const { - ConstantKey key(type.bytes() + 4, ' '); - key[0] = type.code(); - key[1] = type.bits(); - key[2] = type.lanes() & 0xff; - key[3] = (type.lanes() >> 8) & 0xff; - for (int i = 0; i < type.bytes(); i++) { - key[i + 4] = 0; - } - return key; + return make_constant_key(type.code(), type.bits(), type.lanes(), type.bytes(), nullptr); } SpvId SpvBuilder::lookup_null_constant(const Type &type) const { @@ -1010,8 +1787,10 @@ SpvId SpvBuilder::declare_null_constant(const Type &type) { return it->second; } - SpvId result_id = declare_id(SpvConstantId); - SpvId type_id = declare_type(type); + SpvId result_id = make_id(SpvConstantId); + SpvId type_id = add_type(type); + + debug(3) << " declare_null_constant: %" << result_id << " " << type << "\n"; SpvInstruction inst = SpvFactory::null_constant(result_id, type_id); module.add_constant(inst); constant_map[key] = result_id; @@ -1019,129 +1798,325 @@ SpvId SpvBuilder::declare_null_constant(const Type &type) { } SpvId SpvBuilder::declare_bool_constant(bool value) { - const std::string key = make_bool_constant_key(value); + ConstantKey key = make_bool_constant_key(value); ConstantMap::const_iterator it = constant_map.find(key); if (it != constant_map.end()) { return it->second; } - debug(3) << "declare_bool_constant for " << value << "\n"; - Type type = Bool(); - SpvId result_id = declare_id(SpvBoolConstantId); - SpvId type_id = declare_type(type); + SpvId result_id = make_id(SpvBoolConstantId); + SpvId type_id = add_type(type); + + debug(3) << " declare_bool_constant: %" << result_id << " bool " << value << "\n"; SpvInstruction inst = SpvFactory::bool_constant(result_id, type_id, value); module.add_constant(inst); constant_map[key] = result_id; return result_id; } -SpvId SpvBuilder::declare_scalar_constant(const Type &scalar_type, const void *data) { - if (scalar_type.lanes() != 1) { - internal_error << "SPIRV: Invalid type provided for scalar constant!" << scalar_type << "\n"; - return SpvInvalidId; +SpvId SpvBuilder::declare_string_constant(const std::string &value) { + ConstantKey key = make_string_constant_key(value); + ConstantMap::const_iterator it = constant_map.find(key); + if (it != constant_map.end()) { + return it->second; } - const std::string constant_key = make_constant_key(scalar_type, data); + SpvId result_id = make_id(SpvStringConstantId); + debug(3) << " declare_string_constant: %" << result_id << " string '" << value << "'\n"; + SpvInstruction inst = SpvFactory::string_constant(result_id, value); + module.add_constant(inst); + constant_map[key] = result_id; + return result_id; +} + +template +SpvId SpvBuilder::declare_scalar_constant_of_type(const Type &scalar_type, const T *data) { + + ConstantKey constant_key = make_constant_key(scalar_type, data); ConstantMap::const_iterator it = constant_map.find(constant_key); if (it != constant_map.end()) { return it->second; } - if (scalar_type.is_bool() && data) { - bool value = *reinterpret_cast(data); + SpvId result_id = SpvInvalidId; + SpvValueType value_type = SpvInvalidValueType; + if (scalar_type.is_bool()) { + const bool value = (reinterpret_cast(data)[0]); return declare_bool_constant(value); + } else if (scalar_type.is_float()) { + result_id = make_id(SpvFloatConstantId); + value_type = SpvFloatData; + } else if (scalar_type.is_int_or_uint()) { + result_id = make_id(SpvIntConstantId); + value_type = SpvIntegerData; + } else { + internal_error << "SPIRV: Unsupported type:" << scalar_type << "\n"; + return SpvInvalidId; } - debug(3) << "declare_scalar_constant for type " << scalar_type << "\n"; + T value = T(0); + assign_constant(&value, data); + SpvId type_id = add_type(scalar_type); + + debug(3) << " declare_scalar_constant_of_type: " + << "%" << result_id << " " + << "type=" << scalar_type << " " + << "data=" << stringify_constant(value) << "\n"; + + SpvInstruction inst = SpvFactory::constant(result_id, type_id, scalar_type.bytes(), &value, value_type); + module.add_constant(inst); + constant_map[constant_key] = result_id; + return result_id; +} + +template +SpvId SpvBuilder::declare_specialization_constant_of_type(const Type &scalar_type, const T *data) { SpvId result_id = SpvInvalidId; + SpvValueType value_type = SpvInvalidValueType; + // TODO: Add bools? if (scalar_type.is_float()) { - result_id = declare_id(SpvFloatConstantId); - } else if (scalar_type.is_bool()) { - result_id = declare_id(SpvBoolConstantId); + result_id = make_id(SpvFloatConstantId); + value_type = SpvFloatData; } else if (scalar_type.is_int_or_uint()) { - result_id = declare_id(SpvIntConstantId); + result_id = make_id(SpvIntConstantId); + value_type = SpvIntegerData; } else { - internal_error << "SPIRV: Unsupported type:" << scalar_type << "\n"; + internal_error << "SPIRV: Unsupported type for specialization constant: " << scalar_type << "\n"; return SpvInvalidId; } - SpvId type_id = declare_type(scalar_type); - SpvInstruction inst = SpvFactory::constant(result_id, type_id, scalar_type.bytes(), data); - module.add_constant(inst); - constant_map[constant_key] = result_id; + T value = T(0); + assign_constant(&value, data); + SpvId type_id = add_type(scalar_type); + + debug(3) << " declare_specialization_constant_of_type: " + << "%" << result_id << " " + << "type=" << scalar_type << " " + << "data=" << stringify_constant(value) << "\n"; + + SpvInstruction inst = SpvFactory::specialization_constant(result_id, type_id, scalar_type.bytes(), &value, value_type); + module.add_type(inst); // NOTE: Needs to be declared in the type section in order to be used with other type definitions return result_id; } +SpvId SpvBuilder::declare_integer_constant(const Type &type, int64_t value) { + if (!type.is_int() || !type.is_scalar()) { + internal_error << "SPIRV: Invalid type provided for integer constant!" << type << "\n"; + return SpvInvalidId; + } + + SpvId result_id = SpvInvalidId; + if (type.is_int() && type.bits() == 8) { + int8_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else if (type.is_int() && type.bits() == 16) { + int16_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else if (type.is_int() && type.bits() == 32) { + int32_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else if (type.is_int() && type.bits() == 64) { + int64_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else { + user_error << "Unhandled constant integer data conversion from value type '" << type << "'!\n"; + } + return result_id; +} + +SpvId SpvBuilder::declare_float_constant(const Type &type, double value) { + if (!type.is_float() || !type.is_scalar()) { + internal_error << "SPIRV: Invalid type provided for float constant!" << type << "\n"; + return SpvInvalidId; + } + + SpvId result_id = SpvInvalidId; + if (type.is_float() && type.bits() == 16) { + if (type.is_bfloat()) { + bfloat16_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else { + float16_t data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } + } else if (type.is_float() && type.bits() == 32) { + float data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else if (type.is_float() && type.bits() == 64) { + double data(value); + result_id = declare_scalar_constant_of_type(type, &data); + } else { + user_error << "Unhandled constant float data conversion from value type '" << type << "'!\n"; + } + return result_id; +} + +SpvId SpvBuilder::declare_scalar_constant(const Type &scalar_type, const void *data) { + if (scalar_type.lanes() != 1) { + internal_error << "SPIRV: Invalid type provided for scalar constant!" << scalar_type << "\n"; + return SpvInvalidId; + } + + ConstantKey constant_key = make_constant_key(scalar_type, data); + ConstantMap::const_iterator it = constant_map.find(constant_key); + if (it != constant_map.end()) { + return it->second; + } + + // TODO: Maybe add a templated Lambda to clean up this data conversion? + SpvId result_id = SpvInvalidId; + if (scalar_type.is_bool() && data) { + bool value = *reinterpret_cast(data); + return declare_bool_constant(value); + } else if (scalar_type.is_int() && scalar_type.bits() == 8) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 16) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 32) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 64) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 8) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 16) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 32) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 64) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_float() && scalar_type.bits() == 16) { + if (scalar_type.is_bfloat()) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } + } else if (scalar_type.is_float() && scalar_type.bits() == 32) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_float() && scalar_type.bits() == 64) { + result_id = declare_scalar_constant_of_type(scalar_type, reinterpret_cast(data)); + } else { + user_error << "Unhandled constant data conversion from value type '" << scalar_type << "'!\n"; + } + internal_assert(result_id != SpvInvalidId) << "Failed to declare scalar constant of type '" << scalar_type << "'!\n"; + return result_id; +} + +template +SpvBuilder::Components SpvBuilder::declare_constants_for_each_lane(Type type, const void *data) { + SpvBuilder::Components components; + components.reserve(type.lanes()); + + if (type.lanes() == 1) { + internal_error << "SPIRV: Invalid type provided for vector constant!" << type << "\n"; + return components; + } + + Type scalar_type = type.with_lanes(1); + const T *values = reinterpret_cast(data); + for (int c = 0; c < type.lanes(); c++) { + const T *entry = &(values[c]); + SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); + components.push_back(scalar_id); + } + return components; +} + SpvId SpvBuilder::declare_vector_constant(const Type &type, const void *data) { if (type.lanes() == 1) { internal_error << "SPIRV: Invalid type provided for vector constant!" << type << "\n"; return SpvInvalidId; } - const std::string key = make_constant_key(type, data); + ConstantKey key = make_constant_key(type, data); ConstantMap::const_iterator it = constant_map.find(key); if (it != constant_map.end()) { return it->second; } - Type scalar_type = type.with_lanes(1); - std::vector components(type.lanes()); - if (scalar_type.is_float()) { - if (type.bits() == 64) { - const double *values = (const double *)data; - for (int c = 0; c < type.lanes(); c++) { - const double *entry = &(values[c]); - SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); - components.push_back(scalar_id); - } + SpvBuilder::Components components; + if (type.is_int() && type.bits() == 8) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_int() && type.bits() == 16) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_int() && type.bits() == 32) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_int() && type.bits() == 64) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_uint() && type.bits() == 8) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_uint() && type.bits() == 16) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_uint() && type.bits() == 32) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_uint() && type.bits() == 64) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_float() && type.bits() == 16) { + if (type.is_bfloat()) { + components = declare_constants_for_each_lane(type, data); } else { - const float *values = (const float *)data; - for (int c = 0; c < type.lanes(); c++) { - const float *entry = &(values[c]); - SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); - components.push_back(scalar_id); - } - } - } else if (scalar_type.is_bool()) { - const bool *values = (const bool *)data; - for (int c = 0; c < type.lanes(); c++) { - const bool *entry = &(values[c]); - SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); - components.push_back(scalar_id); - } - } else if (scalar_type.is_int_or_uint()) { - if (type.bits() == 64) { - const uint64_t *values = (const uint64_t *)data; - for (int c = 0; c < type.lanes(); c++) { - const uint64_t *entry = &(values[c]); - SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); - components.push_back(scalar_id); - } - } else { - const uint32_t *values = (const uint32_t *)data; - for (int c = 0; c < type.lanes(); c++) { - const uint32_t *entry = &(values[c]); - SpvId scalar_id = declare_scalar_constant(scalar_type, (const void *)entry); - components.push_back(scalar_id); - } + components = declare_constants_for_each_lane(type, data); } + } else if (type.is_float() && type.bits() == 32) { + components = declare_constants_for_each_lane(type, data); + } else if (type.is_float() && type.bits() == 64) { + components = declare_constants_for_each_lane(type, data); } else { - internal_error << "SPIRV: Unsupported type:" << type << "\n"; - return SpvInvalidId; + user_error << "Unhandled constant data conversion from value type '" << type << "'!"; } - SpvId result_id = declare_id(SpvCompositeConstantId); - SpvId type_id = declare_type(type); + SpvId type_id = add_type(type); + SpvId result_id = make_id(SpvCompositeConstantId); + debug(3) << " declare_vector_constant: %" << result_id << " key=" << key << " type=" << type << " data=" << data << "\n"; SpvInstruction inst = SpvFactory::composite_constant(result_id, type_id, components); module.add_constant(inst); constant_map[key] = result_id; return result_id; } -SpvId SpvBuilder::lookup_constant(const Type &type, const void *data) const { - ConstantKey key = make_constant_key(type, data); +SpvId SpvBuilder::declare_specialization_constant(const Type &scalar_type, const void *data) { + if (scalar_type.lanes() != 1) { + internal_error << "SPIRV: Invalid type provided for scalar constant!" << scalar_type << "\n"; + return SpvInvalidId; + } + + SpvId result_id = SpvInvalidId; + if (scalar_type.is_int() && scalar_type.bits() == 8) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 16) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 32) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_int() && scalar_type.bits() == 64) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 8) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 16) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 32) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_uint() && scalar_type.bits() == 64) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_float() && scalar_type.bits() == 16) { + if (scalar_type.is_bfloat()) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } + } else if (scalar_type.is_float() && scalar_type.bits() == 32) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else if (scalar_type.is_float() && scalar_type.bits() == 64) { + result_id = declare_specialization_constant_of_type(scalar_type, reinterpret_cast(data)); + } else { + user_error << "Unhandled constant data conversion from value type '" << scalar_type << "'!\n"; + } + internal_assert(result_id != SpvInvalidId) << "Failed to declare specialization constant of type '" << scalar_type << "'!\n"; + return result_id; +} + +SpvId SpvBuilder::lookup_constant(const Type &type, const void *data, bool is_specialization) const { + ConstantKey key = make_constant_key(type, data, is_specialization); ConstantMap::const_iterator it = constant_map.find(key); if (it != constant_map.end()) { return it->second; @@ -1149,59 +2124,39 @@ SpvId SpvBuilder::lookup_constant(const Type &type, const void *data) const { return SpvInvalidId; } -SpvId SpvBuilder::declare_constant(const Type &type, const void *data) { +SpvId SpvBuilder::add_constant(const Type &type, const void *data, bool is_specialization) { - const std::string key = make_constant_key(type, data); + ConstantKey key = make_constant_key(type, data, is_specialization); ConstantMap::const_iterator it = constant_map.find(key); if (it != constant_map.end()) { return it->second; } - debug(3) << "declare_constant for type " << type << "\n"; - if (type.lanes() == 1) { + if (is_specialization) { + return declare_specialization_constant(type, data); + } else if (type.lanes() == 1) { return declare_scalar_constant(type, data); } else { return declare_vector_constant(type, data); } } -SpvId SpvBuilder::declare_access_chain(SpvId ptr_type_id, SpvId base_id, SpvId element_id, const Indices &indices) { - SpvId access_chain_id = declare_id(SpvAccessChainId); - append(SpvFactory::in_bounds_access_chain(ptr_type_id, access_chain_id, base_id, element_id, indices)); +SpvId SpvBuilder::declare_access_chain(SpvId ptr_type_id, SpvId base_id, const Indices &indices) { + SpvId access_chain_id = make_id(SpvAccessChainId); + append(SpvFactory::in_bounds_access_chain(ptr_type_id, access_chain_id, base_id, indices)); return access_chain_id; } -SpvId SpvBuilder::map_instruction(const SpvInstruction &inst) { - const SpvId key = inst.result_id(); - if (instruction_map.find(key) == instruction_map.end()) { - instruction_map.insert({key, inst}); - } else { - instruction_map[key] = inst; - } - return key; -} - -SpvInstruction SpvBuilder::lookup_instruction(SpvId result_id) const { - InstructionMap::const_iterator it = instruction_map.find(result_id); - if (it == instruction_map.end()) { - return SpvInstruction(); - } - return it->second; +SpvId SpvBuilder::declare_pointer_access_chain(SpvId ptr_type_id, SpvId base_id, SpvId element_id, const Indices &indices) { + SpvId access_chain_id = make_id(SpvAccessChainId); + append(SpvFactory::pointer_access_chain(ptr_type_id, access_chain_id, base_id, element_id, indices)); + return access_chain_id; } SpvBuilder::FunctionTypeKey SpvBuilder::make_function_type_key(SpvId return_type_id, const ParamTypes ¶m_type_ids) const { - TypeKey key((1 + param_type_ids.size()) * sizeof(SpvId), ' '); - - uint32_t index = 0; - for (size_t i = 0; i < sizeof(uint32_t); i++, index++) { - key[index] = (return_type_id & 0xff); - return_type_id >>= 8; - } + TypeKey key = hash_splitmix64(return_type_id); for (SpvId type_id : param_type_ids) { - for (size_t i = 0; i < sizeof(uint32_t); i++, index++) { - key[index] = (type_id & 0xff); - type_id >>= 8; - } + key = hash_combine(key, type_id); } return key; } @@ -1215,30 +2170,106 @@ SpvId SpvBuilder::lookup_function_type(SpvId return_type_id, const ParamTypes &p return SpvInvalidId; } -SpvId SpvBuilder::declare_function_type(SpvId return_type_id, const ParamTypes ¶m_type_ids) { +SpvId SpvBuilder::add_function_type(SpvId return_type_id, const ParamTypes ¶m_type_ids) { FunctionTypeKey func_type_key = make_function_type_key(return_type_id, param_type_ids); FunctionTypeMap::const_iterator it = function_type_map.find(func_type_key); if (it != function_type_map.end()) { return it->second; } - - SpvId function_type_id = declare_id(SpvFunctionTypeId); + SpvId function_type_id = make_id(SpvFunctionTypeId); + debug(3) << " add_function_type: %" << function_type_id << "\n" + << " return_type_id=" << return_type_id << "\n" + << " param_type_ids=["; + for (SpvId p : param_type_ids) { + debug(3) << " " << p; + } + debug(3) << " ]\n"; SpvInstruction inst = SpvFactory::function_type(function_type_id, return_type_id, param_type_ids); module.add_type(inst); function_type_map[func_type_key] = function_type_id; return function_type_id; } -SpvId SpvBuilder::declare_runtime_array(SpvId base_type_id) { - SpvId runtime_array_id = declare_id(SpvRuntimeArrayTypeId); +SpvId SpvBuilder::add_runtime_array(SpvId base_type_id) { + SpvId runtime_array_id = make_id(SpvRuntimeArrayTypeId); SpvInstruction inst = SpvFactory::runtime_array_type(runtime_array_id, base_type_id); module.add_type(inst); return runtime_array_id; } +SpvId SpvBuilder::add_array_with_default_size(SpvId base_type_id, SpvId array_size_id) { + SpvId array_id = make_id(SpvArrayTypeId); + SpvInstruction inst = SpvFactory::array_type(array_id, base_type_id, array_size_id); + module.add_type(inst); + return array_id; +} + +bool SpvBuilder::is_pointer_type(SpvId id) const { + BaseTypeMap::const_iterator it = base_type_map.find(id); + if (it != base_type_map.end()) { + return true; + } + return false; +} + +bool SpvBuilder::is_struct_type(SpvId id) const { + SpvKind kind = kind_of(id); + if (kind == SpvStructTypeId) { + return true; + } + return false; +} + +bool SpvBuilder::is_vector_type(SpvId id) const { + SpvKind kind = kind_of(id); + if (kind == SpvVectorTypeId) { + return true; + } + return false; +} + +bool SpvBuilder::is_scalar_type(SpvId id) const { + SpvKind kind = kind_of(id); + if ((kind == SpvFloatTypeId) || + (kind == SpvIntTypeId) || + (kind == SpvBoolTypeId)) { + return true; + } + return false; +} + +bool SpvBuilder::is_array_type(SpvId id) const { + SpvKind kind = kind_of(id); + if (kind == SpvArrayTypeId) { + return true; + } + return false; +} + +bool SpvBuilder::is_constant(SpvId id) const { + SpvKind kind = kind_of(id); + if ((kind == SpvConstantId) || + (kind == SpvBoolConstantId) || + (kind == SpvIntConstantId) || + (kind == SpvFloatConstantId) || + (kind == SpvStringConstantId) || + (kind == SpvCompositeConstantId)) { + return true; + } + return false; +} + +SpvId SpvBuilder::lookup_base_type(SpvId pointer_type) const { + BaseTypeMap::const_iterator it = base_type_map.find(pointer_type); + if (it != base_type_map.end()) { + return it->second; + } + return SpvInvalidId; +} + void SpvBuilder::append(SpvInstruction inst) { - if (!block_stack.empty()) { - current_block().add_instruction(std::move(inst)); + if (active_block.is_defined()) { + active_block.add_instruction(std::move(inst)); } else { internal_error << "SPIRV: Current block undefined! Unable to append!\n"; } @@ -1248,18 +2279,47 @@ void SpvBuilder::append(SpvInstruction inst) { // -- Factory Methods for Specific Instructions +SpvInstruction SpvFactory::no_op(SpvId result_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpNop); + return inst; +} + SpvInstruction SpvFactory::label(SpvId result_id) { SpvInstruction inst = SpvInstruction::make(SpvOpLabel); inst.set_result_id(result_id); return inst; } +SpvInstruction SpvFactory::debug_line(SpvId string_id, uint32_t line, uint32_t column) { + SpvInstruction inst = SpvInstruction::make(SpvOpLine); + inst.add_operand(string_id); + inst.add_immediates({ + {line, SpvIntegerLiteral}, + {column, SpvIntegerLiteral}, + }); + return inst; +} + +SpvInstruction SpvFactory::debug_string(SpvId result_id, const std::string &string) { + SpvInstruction inst = SpvInstruction::make(SpvOpString); + inst.set_result_id(result_id); + inst.add_string(string); + return inst; +} + +SpvInstruction SpvFactory::debug_symbol(SpvId target_id, const std::string &symbol) { + SpvInstruction inst = SpvInstruction::make(SpvOpName); + inst.set_result_id(target_id); + inst.add_string(symbol); + return inst; +} + SpvInstruction SpvFactory::decorate(SpvId target_id, SpvDecoration decoration_type, const SpvFactory::Literals &literals) { SpvInstruction inst = SpvInstruction::make(SpvOpDecorate); inst.add_operand(target_id); - inst.add_immediate(decoration_type); + inst.add_immediate(decoration_type, SpvIntegerLiteral); for (uint32_t l : literals) { - inst.add_immediate(l); + inst.add_immediate(l, SpvIntegerLiteral); } return inst; } @@ -1267,9 +2327,10 @@ SpvInstruction SpvFactory::decorate(SpvId target_id, SpvDecoration decoration_ty SpvInstruction SpvFactory::decorate_member(SpvId struct_type_id, uint32_t member_index, SpvDecoration decoration_type, const SpvFactory::Literals &literals) { SpvInstruction inst = SpvInstruction::make(SpvOpMemberDecorate); inst.add_operand(struct_type_id); - inst.add_immediate(decoration_type); + inst.add_immediates({{member_index, SpvIntegerLiteral}, + {decoration_type, SpvIntegerLiteral}}); for (uint32_t l : literals) { - inst.add_immediate(l); + inst.add_immediate(l, SpvIntegerLiteral); } return inst; } @@ -1286,8 +2347,7 @@ SpvInstruction SpvFactory::binary_op(SpvOp op_code, SpvId type_id, SpvId result_ SpvInstruction inst = SpvInstruction::make(op_code); inst.set_type_id(type_id); inst.set_result_id(result_id); - inst.add_operand(src_a_id); - inst.add_operand(src_b_id); + inst.add_operands({src_a_id, src_b_id}); return inst; } @@ -1314,15 +2374,15 @@ SpvInstruction SpvFactory::bool_type(SpvId bool_type_id) { SpvInstruction SpvFactory::integer_type(SpvId int_type_id, uint32_t bits, uint32_t signedness) { SpvInstruction inst = SpvInstruction::make(SpvOpTypeInt); inst.set_result_id(int_type_id); - inst.add_immediate(bits); - inst.add_immediate(signedness); + inst.add_immediates({{bits, SpvIntegerLiteral}, + {signedness, SpvIntegerLiteral}}); return inst; } SpvInstruction SpvFactory::float_type(SpvId float_type_id, uint32_t bits) { SpvInstruction inst = SpvInstruction::make(SpvOpTypeFloat); inst.set_result_id(float_type_id); - inst.add_immediate(bits); + inst.add_immediate(bits, SpvIntegerLiteral); return inst; } @@ -1330,24 +2390,21 @@ SpvInstruction SpvFactory::vector_type(SpvId vector_type_id, SpvId element_type_ SpvInstruction inst = SpvInstruction::make(SpvOpTypeVector); inst.set_result_id(vector_type_id); inst.add_operand(element_type_id); - inst.add_immediate(vector_size); + inst.add_immediate(vector_size, SpvIntegerLiteral); return inst; } -SpvInstruction SpvFactory::array_type(SpvId array_type_id, SpvId element_type_id, uint32_t array_size) { +SpvInstruction SpvFactory::array_type(SpvId array_type_id, SpvId element_type_id, SpvId array_size_id) { SpvInstruction inst = SpvInstruction::make(SpvOpTypeArray); inst.set_result_id(array_type_id); - inst.add_operand(element_type_id); - inst.add_immediate(array_size); + inst.add_operands({element_type_id, array_size_id}); return inst; } SpvInstruction SpvFactory::struct_type(SpvId result_id, const SpvFactory::MemberTypeIds &member_type_ids) { SpvInstruction inst = SpvInstruction::make(SpvOpTypeStruct); inst.set_result_id(result_id); - for (const SpvId member_type : member_type_ids) { - inst.add_operand(member_type); - } + inst.add_operands(member_type_ids); return inst; } @@ -1361,26 +2418,24 @@ SpvInstruction SpvFactory::runtime_array_type(SpvId result_type_id, SpvId base_t SpvInstruction SpvFactory::pointer_type(SpvId pointer_type_id, SpvStorageClass storage_class, SpvId base_type_id) { SpvInstruction inst = SpvInstruction::make(SpvOpTypePointer); inst.set_result_id(pointer_type_id); - inst.add_immediate(storage_class); + inst.add_immediate(storage_class, SpvIntegerLiteral); inst.add_operand(base_type_id); return inst; } SpvInstruction SpvFactory::function_type(SpvId function_type_id, SpvId return_type_id, const SpvFactory::ParamTypes ¶m_type_ids) { SpvInstruction inst = SpvInstruction::make(SpvOpTypeFunction); - inst.set_type_id(return_type_id); inst.set_result_id(function_type_id); - for (SpvId type_id : param_type_ids) { - inst.add_operand(type_id); - } + inst.add_operand(return_type_id); + inst.add_operands(param_type_ids); return inst; } -SpvInstruction SpvFactory::constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data) { +SpvInstruction SpvFactory::constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data, SpvValueType value_type) { SpvInstruction inst = SpvInstruction::make(SpvOpConstant); inst.set_type_id(type_id); inst.set_result_id(result_id); - inst.add_data(bytes, data); + inst.add_data(bytes, data, value_type); return inst; } @@ -1399,13 +2454,26 @@ SpvInstruction SpvFactory::bool_constant(SpvId result_id, SpvId type_id, bool va return inst; } +SpvInstruction SpvFactory::string_constant(SpvId result_id, const std::string &value) { + SpvInstruction inst = SpvInstruction::make(SpvOpString); + inst.set_result_id(result_id); + inst.add_string(value); + return inst; +} + SpvInstruction SpvFactory::composite_constant(SpvId result_id, SpvId type_id, const SpvFactory::Components &components) { SpvInstruction inst = SpvInstruction::make(SpvOpConstantComposite); inst.set_type_id(type_id); inst.set_result_id(result_id); - for (SpvId scalar_id : components) { - inst.add_operand(scalar_id); - } + inst.add_operands(components); + return inst; +} + +SpvInstruction SpvFactory::specialization_constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data, SpvValueType value_type) { + SpvInstruction inst = SpvInstruction::make(SpvOpSpecConstant); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_data(bytes, data, value_type); return inst; } @@ -1413,7 +2481,7 @@ SpvInstruction SpvFactory::variable(SpvId result_id, SpvId result_type_id, uint3 SpvInstruction inst = SpvInstruction::make(SpvOpVariable); inst.set_type_id(result_type_id); inst.set_result_id(result_id); - inst.add_immediate(storage_class); + inst.add_immediate(storage_class, SpvIntegerLiteral); if (initializer_id != SpvInvalidId) { inst.add_operand(initializer_id); } @@ -1424,7 +2492,7 @@ SpvInstruction SpvFactory::function(SpvId return_type_id, SpvId func_id, uint32_ SpvInstruction inst = SpvInstruction::make(SpvOpFunction); inst.set_type_id(return_type_id); inst.set_result_id(func_id); - inst.add_immediate(control_mask); + inst.add_immediate(control_mask, SpvBitMaskLiteral); inst.add_operand(func_type_id); return inst; } @@ -1452,44 +2520,88 @@ SpvInstruction SpvFactory::return_stmt(SpvId return_value_id) { SpvInstruction SpvFactory::entry_point(SpvId exec_model, SpvId func_id, const std::string &name, const SpvFactory::Variables &variables) { SpvInstruction inst = SpvInstruction::make(SpvOpEntryPoint); - inst.add_immediate(exec_model); + inst.add_immediate(exec_model, SpvIntegerLiteral); inst.add_operand(func_id); inst.add_string(name); - for (SpvId var : variables) { - inst.add_operand(var); - } + inst.add_operands(variables); return inst; } SpvInstruction SpvFactory::memory_model(SpvAddressingModel addressing_model, SpvMemoryModel memory_model) { SpvInstruction inst = SpvInstruction::make(SpvOpMemoryModel); - inst.add_immediate(addressing_model); - inst.add_immediate(memory_model); + inst.add_immediates({{addressing_model, SpvIntegerLiteral}, + {memory_model, SpvIntegerLiteral}}); return inst; } -SpvInstruction SpvFactory::exec_mode_local_size(SpvId function_id, uint32_t wg_size_x, uint32_t wg_size_y, uint32_t wg_size_z) { +SpvInstruction SpvFactory::exec_mode_local_size(SpvId function_id, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z) { SpvInstruction inst = SpvInstruction::make(SpvOpExecutionMode); inst.add_operand(function_id); - inst.add_immediate(SpvExecutionModeLocalSize); - inst.add_immediate(wg_size_x); - inst.add_immediate(wg_size_y); - inst.add_immediate(wg_size_z); + inst.add_immediates({ + {SpvExecutionModeLocalSize, SpvIntegerLiteral}, + {local_size_x, SpvIntegerLiteral}, + {local_size_y, SpvIntegerLiteral}, + {local_size_z, SpvIntegerLiteral}, + }); + return inst; +} + +SpvInstruction SpvFactory::exec_mode_local_size_id(SpvId function_id, SpvId local_size_x_id, SpvId local_size_y_id, SpvId local_size_z_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpExecutionModeId); + inst.add_operand(function_id); + inst.add_immediates({ + {SpvExecutionModeLocalSizeId, SpvIntegerLiteral}, + }); + inst.add_operands({local_size_x_id, + local_size_y_id, + local_size_z_id}); return inst; } -SpvInstruction SpvFactory::control_barrier(SpvId execution_scope_id, SpvId memory_scope_id, uint32_t semantics_mask) { +SpvInstruction SpvFactory::memory_barrier(SpvId memory_scope_id, SpvId semantics_mask_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpMemoryBarrier); + inst.add_operands({memory_scope_id, semantics_mask_id}); + return inst; +} + +SpvInstruction SpvFactory::control_barrier(SpvId execution_scope_id, SpvId memory_scope_id, SpvId semantics_mask_id) { SpvInstruction inst = SpvInstruction::make(SpvOpControlBarrier); - inst.add_operand(execution_scope_id); - inst.add_operand(memory_scope_id); - inst.add_immediate(semantics_mask); + inst.add_operands({execution_scope_id, memory_scope_id, semantics_mask_id}); return inst; } -SpvInstruction SpvFactory::logical_not(SpvId type_id, SpvId result_id, SpvId src_id) { +SpvInstruction SpvFactory::bitwise_not(SpvId type_id, SpvId result_id, SpvId src_id) { return unary_op(SpvOpNot, type_id, result_id, src_id); } +SpvInstruction SpvFactory::bitwise_and(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id) { + return binary_op(SpvOpBitwiseAnd, type_id, result_id, src_a_id, src_b_id); +} + +SpvInstruction SpvFactory::logical_not(SpvId type_id, SpvId result_id, SpvId src_id) { + return unary_op(SpvOpLogicalNot, type_id, result_id, src_id); +} + +SpvInstruction SpvFactory::logical_and(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id) { + return binary_op(SpvOpLogicalAnd, type_id, result_id, src_a_id, src_b_id); +} + +SpvInstruction SpvFactory::shift_right_logical(SpvId type_id, SpvId result_id, SpvId src_id, SpvId shift_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpShiftRightLogical); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_id, shift_id}); + return inst; +} + +SpvInstruction SpvFactory::shift_right_arithmetic(SpvId type_id, SpvId result_id, SpvId src_id, SpvId shift_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpShiftRightArithmetic); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_id, shift_id}); + return inst; +} + SpvInstruction SpvFactory::multiply_extended(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed) { return binary_op(is_signed ? SpvOpSMulExtended : SpvOpUMulExtended, type_id, result_id, src_a_id, src_b_id); } @@ -1498,21 +2610,25 @@ SpvInstruction SpvFactory::select(SpvId type_id, SpvId result_id, SpvId conditio SpvInstruction inst = SpvInstruction::make(SpvOpSelect); inst.set_type_id(type_id); inst.set_result_id(result_id); - inst.add_operand(condition_id); - inst.add_operand(true_id); - inst.add_operand(false_id); + inst.add_operands({condition_id, true_id, false_id}); return inst; } -SpvInstruction SpvFactory::in_bounds_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, SpvId element_id, const SpvFactory::Indices &indices) { +SpvInstruction SpvFactory::in_bounds_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, const SpvFactory::Indices &indices) { SpvInstruction inst = SpvInstruction::make(SpvOpInBoundsAccessChain); inst.set_type_id(type_id); inst.set_result_id(result_id); inst.add_operand(base_id); - inst.add_operand(element_id); - for (SpvId i : indices) { - inst.add_operand(i); - } + inst.add_operands(indices); + return inst; +} + +SpvInstruction SpvFactory::pointer_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, SpvId element_id, const SpvFactory::Indices &indices) { + SpvInstruction inst = SpvInstruction::make(SpvOpPtrAccessChain); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({base_id, element_id}); + inst.add_operands(indices); return inst; } @@ -1521,15 +2637,25 @@ SpvInstruction SpvFactory::load(SpvId type_id, SpvId result_id, SpvId ptr_id, ui inst.set_type_id(type_id); inst.set_result_id(result_id); inst.add_operand(ptr_id); - inst.add_immediate(access_mask); + inst.add_immediate(access_mask, SpvBitMaskLiteral); return inst; } SpvInstruction SpvFactory::store(SpvId ptr_id, SpvId obj_id, uint32_t access_mask) { SpvInstruction inst = SpvInstruction::make(SpvOpStore); - inst.add_operand(ptr_id); - inst.add_operand(obj_id); - inst.add_immediate(access_mask); + inst.add_operands({ptr_id, obj_id}); + inst.add_immediate(access_mask, SpvBitMaskLiteral); + return inst; +} + +SpvInstruction SpvFactory::composite_insert(SpvId type_id, SpvId result_id, SpvId object_id, SpvId composite_id, const SpvFactory::Indices &indices) { + SpvInstruction inst = SpvInstruction::make(SpvOpCompositeInsert); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({object_id, composite_id}); + for (SpvId i : indices) { + inst.add_immediate(i, SpvIntegerLiteral); + } return inst; } @@ -1539,18 +2665,62 @@ SpvInstruction SpvFactory::composite_extract(SpvId type_id, SpvId result_id, Spv inst.set_result_id(result_id); inst.add_operand(composite_id); for (SpvId i : indices) { - inst.add_immediate(i); + inst.add_immediate(i, SpvIntegerLiteral); + } + return inst; +} + +SpvInstruction SpvFactory::composite_construct(SpvId type_id, SpvId result_id, const Components &constituents) { + SpvInstruction inst = SpvInstruction::make(SpvOpCompositeConstruct); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + for (SpvId id : constituents) { + inst.add_operand(id); } return inst; } -SpvInstruction SpvFactory::vector_insert_dynamic(SpvId result_id, SpvId vector_id, SpvId value_id, uint32_t index) { +SpvInstruction SpvFactory::vector_insert_dynamic(SpvId type_id, SpvId result_id, SpvId vector_id, SpvId value_id, SpvId index_id) { SpvInstruction inst = SpvInstruction::make(SpvOpVectorInsertDynamic); - inst.set_type_id(SpvOpTypeVector); + inst.set_type_id(type_id); inst.set_result_id(result_id); - inst.add_operand(vector_id); - inst.add_operand(value_id); - inst.add_immediate(index); + inst.add_operands({vector_id, value_id, index_id}); + return inst; +} + +SpvInstruction SpvFactory::vector_extract_dynamic(SpvId type_id, SpvId result_id, SpvId vector_id, SpvId value_id, SpvId index_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpVectorExtractDynamic); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({vector_id, value_id, index_id}); + return inst; +} + +SpvInstruction SpvFactory::vector_shuffle(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, const Indices &indices) { + SpvInstruction inst = SpvInstruction::make(SpvOpVectorShuffle); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operand(src_a_id); + inst.add_operand(src_b_id); + for (SpvId i : indices) { + inst.add_immediate(i, SpvIntegerLiteral); + } + return inst; +} + +SpvInstruction SpvFactory::is_inf(SpvId type_id, SpvId result_id, SpvId src_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpIsInf); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operand(src_id); + return inst; +} + +SpvInstruction SpvFactory::is_nan(SpvId type_id, SpvId result_id, SpvId src_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpIsNan); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operand(src_id); return inst; } @@ -1566,6 +2736,10 @@ SpvInstruction SpvFactory::integer_add(SpvId type_id, SpvId result_id, SpvId src return binary_op(SpvOpIAdd, type_id, result_id, src_a_id, src_b_id); } +SpvInstruction SpvFactory::float_add(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id) { + return binary_op(SpvOpFAdd, type_id, result_id, src_a_id, src_b_id); +} + SpvInstruction SpvFactory::branch(SpvId target_label_id) { SpvInstruction inst = SpvInstruction::make(SpvOpBranch); inst.add_operand(target_label_id); @@ -1573,28 +2747,74 @@ SpvInstruction SpvFactory::branch(SpvId target_label_id) { } SpvInstruction SpvFactory::conditional_branch(SpvId condition_label_id, SpvId true_label_id, SpvId false_label_id, const SpvFactory::BranchWeights &weights) { - SpvInstruction inst = SpvInstruction::make(SpvOpBranch); - inst.add_operand(condition_label_id); - inst.add_operand(true_label_id); - inst.add_operand(false_label_id); + SpvInstruction inst = SpvInstruction::make(SpvOpBranchConditional); + inst.add_operands({condition_label_id, true_label_id, false_label_id}); for (uint32_t w : weights) { - inst.add_immediate(w); + inst.add_immediate(w, SpvIntegerLiteral); } return inst; } +SpvInstruction SpvFactory::integer_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpIEqual); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + +SpvInstruction SpvFactory::integer_not_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id) { + SpvInstruction inst = SpvInstruction::make(SpvOpINotEqual); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + +SpvInstruction SpvFactory::integer_less_than(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed) { + SpvInstruction inst = SpvInstruction::make(is_signed ? SpvOpSLessThan : SpvOpULessThan); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + +SpvInstruction SpvFactory::integer_less_than_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed) { + SpvInstruction inst = SpvInstruction::make(is_signed ? SpvOpSLessThanEqual : SpvOpULessThanEqual); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + +SpvInstruction SpvFactory::integer_greater_than(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed) { + SpvInstruction inst = SpvInstruction::make(is_signed ? SpvOpSGreaterThan : SpvOpUGreaterThan); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + +SpvInstruction SpvFactory::integer_greater_than_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed) { + SpvInstruction inst = SpvInstruction::make(is_signed ? SpvOpSGreaterThanEqual : SpvOpUGreaterThanEqual); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operands({src_a_id, src_b_id}); + return inst; +} + SpvInstruction SpvFactory::loop_merge(SpvId merge_label_id, SpvId continue_label_id, uint32_t loop_control_mask) { SpvInstruction inst = SpvInstruction::make(SpvOpLoopMerge); inst.add_operand(merge_label_id); inst.add_operand(continue_label_id); - inst.add_immediate(loop_control_mask); + inst.add_immediate(loop_control_mask, SpvBitMaskLiteral); return inst; } SpvInstruction SpvFactory::selection_merge(SpvId merge_label_id, uint32_t selection_control_mask) { SpvInstruction inst = SpvInstruction::make(SpvOpSelectionMerge); inst.add_operand(merge_label_id); - inst.add_immediate(selection_control_mask); + inst.add_immediate(selection_control_mask, SpvBitMaskLiteral); return inst; } @@ -1603,15 +2823,14 @@ SpvInstruction SpvFactory::phi(SpvId type_id, SpvId result_id, const SpvFactory: inst.set_type_id(type_id); inst.set_result_id(result_id); for (const SpvFactory::VariableBlockIdPair &vb : block_vars) { - inst.add_operand(vb.first); // variable id - inst.add_operand(vb.second); // block id + inst.add_operands({vb.first, vb.second}); // variable id, block id } return inst; } SpvInstruction SpvFactory::capability(const SpvCapability &capability) { SpvInstruction inst = SpvInstruction::make(SpvOpCapability); - inst.add_immediate(capability); + inst.add_immediate(capability, SpvIntegerLiteral); return inst; } @@ -1621,12 +2840,128 @@ SpvInstruction SpvFactory::extension(const std::string &extension) { return inst; } -SpvInstruction SpvFactory::import(const std::string &import) { +SpvInstruction SpvFactory::import(SpvId instruction_set_id, const std::string &instruction_set_name) { SpvInstruction inst = SpvInstruction::make(SpvOpExtInstImport); - inst.add_string(import); + inst.set_result_id(instruction_set_id); + inst.add_string(instruction_set_name); + return inst; +} + +SpvInstruction SpvFactory::extended(SpvId instruction_set_id, SpvId instruction_number, SpvId type_id, SpvId result_id, const SpvFactory::Operands &operands) { + SpvInstruction inst = SpvInstruction::make(SpvOpExtInst); + inst.set_type_id(type_id); + inst.set_result_id(result_id); + inst.add_operand(instruction_set_id); + inst.add_immediate(instruction_number, SpvIntegerLiteral); + inst.add_operands(operands); return inst; } +/** GLSL extended instruction utility methods */ + +bool is_glsl_unary_op(SpvId glsl_op_code) { + return (glsl_operand_count(glsl_op_code) == 1); +} + +bool is_glsl_binary_op(SpvId glsl_op_code) { + return (glsl_operand_count(glsl_op_code) == 2); +} + +uint32_t glsl_operand_count(SpvId glsl_op_code) { + switch (glsl_op_code) { + case GLSLstd450Round: + case GLSLstd450RoundEven: + case GLSLstd450Trunc: + case GLSLstd450FAbs: + case GLSLstd450SAbs: + case GLSLstd450FSign: + case GLSLstd450SSign: + case GLSLstd450Floor: + case GLSLstd450Ceil: + case GLSLstd450Fract: + case GLSLstd450Radians: + case GLSLstd450Degrees: + case GLSLstd450Sin: + case GLSLstd450Cos: + case GLSLstd450Tan: + case GLSLstd450Asin: + case GLSLstd450Acos: + case GLSLstd450Atan: + case GLSLstd450Asinh: + case GLSLstd450Acosh: + case GLSLstd450Atanh: + case GLSLstd450Cosh: + case GLSLstd450Sinh: + case GLSLstd450Tanh: + case GLSLstd450Exp: + case GLSLstd450Log: + case GLSLstd450Exp2: + case GLSLstd450Log2: + case GLSLstd450Sqrt: + case GLSLstd450InverseSqrt: + case GLSLstd450Determinant: + case GLSLstd450MatrixInverse: + case GLSLstd450ModfStruct: + case GLSLstd450FrexpStruct: + case GLSLstd450PackSnorm4x8: + case GLSLstd450PackUnorm4x8: + case GLSLstd450PackSnorm2x16: + case GLSLstd450PackUnorm2x16: + case GLSLstd450PackHalf2x16: + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackSnorm4x8: + case GLSLstd450UnpackUnorm4x8: + case GLSLstd450UnpackSnorm2x16: + case GLSLstd450UnpackUnorm2x16: + case GLSLstd450UnpackHalf2x16: + case GLSLstd450UnpackDouble2x32: + case GLSLstd450Length: + case GLSLstd450Normalize: + case GLSLstd450FindILsb: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + case GLSLstd450InterpolateAtCentroid: { + return 1; // unary op + } + case GLSLstd450Atan2: + case GLSLstd450Pow: + case GLSLstd450Modf: + case GLSLstd450FMin: + case GLSLstd450UMin: + case GLSLstd450SMin: + case GLSLstd450FMax: + case GLSLstd450UMax: + case GLSLstd450SMax: + case GLSLstd450Step: + case GLSLstd450Frexp: + case GLSLstd450Ldexp: + case GLSLstd450Distance: + case GLSLstd450Cross: + case GLSLstd450Reflect: + case GLSLstd450InterpolateAtOffset: + case GLSLstd450InterpolateAtSample: + case GLSLstd450NMax: + case GLSLstd450NMin: { + return 2; // binary op + } + case GLSLstd450FMix: + case GLSLstd450IMix: + case GLSLstd450SmoothStep: + case GLSLstd450Fma: + case GLSLstd450FClamp: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450NClamp: { + return 3; // trinary op + } + case GLSLstd450Bad: + case GLSLstd450Count: + default: + break; + }; + return SpvInvalidId; +} + /** Specializations for reference counted classes */ template<> RefCount &ref_count(const SpvInstructionContents *c) noexcept { @@ -1668,6 +3003,897 @@ void destroy(const SpvModuleContents *c) { delete c; } +// -- + +std::ostream &operator<<(std::ostream &stream, const SpvModule &module) { + if (!module.is_defined()) { + stream << "(undefined)"; + return stream; + } + + stream << "; SPIR-V\n"; + stream << "; Version: " + << std::to_string(spirv_major_version(module.version_format())) << "." + << std::to_string(spirv_minor_version(module.version_format())) << "\n"; + stream << "; Generator: Khronos; 0\n"; + stream << "; Bound: " << std::to_string(module.binding_count()) << "\n"; + stream << "; Schema: 0\n"; // reserved for future use + + SpvModule::Capabilities capabilities = module.capabilities(); + if (!capabilities.empty()) { + stream << "\n"; + stream << "; Capabilities\n"; + for (const SpvCapability &value : capabilities) { + SpvInstruction inst = SpvFactory::capability(value); + stream << inst; + } + } + + SpvModule::Extensions extensions = module.extensions(); + if (!extensions.empty()) { + stream << "\n"; + stream << "; Extensions\n"; + for (const std::string &value : extensions) { + SpvInstruction inst = SpvFactory::extension(value); + stream << inst; + } + } + + SpvModule::Imports imports = module.imports(); + if (!imports.empty()) { + stream << "\n"; + stream << "; Extended Instruction Set Imports\n"; + for (const SpvModule::Imports::value_type &v : imports) { + SpvInstruction inst = SpvFactory::import(v.first, v.second); + stream << inst; + } + } + + SpvInstruction memory_model = SpvFactory::memory_model(module.addressing_model(), module.memory_model()); + stream << "\n"; + stream << "; Memory Model\n"; + stream << memory_model; + + if (module.entry_point_count() > 0) { + stream << "\n"; + stream << "; Entry Points\n"; + SpvModule::EntryPointNames entry_point_names = module.entry_point_names(); + for (const std::string &name : entry_point_names) { + SpvInstruction inst = module.entry_point(name); + stream << "; " << name << "\n"; + stream << inst; + } + } + + for (const SpvInstruction &inst : module.execution_modes()) { + stream << inst; + } + + if (!module.debug_source().empty() || !module.debug_symbols().empty()) { + stream << "\n"; + stream << "; Debug Information\n"; + } + for (const SpvInstruction &inst : module.debug_source()) { + stream << inst; + } + for (const SpvInstruction &inst : module.debug_symbols()) { + stream << inst; + } + + if (!module.annotations().empty()) { + stream << "\n"; + stream << "; Annotations\n"; + for (const SpvInstruction &inst : module.annotations()) { + stream << inst; + } + } + + if (!module.type_definitions().empty()) { + stream << "\n"; + stream << "; Type Definitions\n"; + for (const SpvInstruction &inst : module.type_definitions()) { + stream << inst; + } + } + + if (!module.global_constants().empty()) { + stream << "\n"; + stream << "; Global Constants\n"; + for (const SpvInstruction &inst : module.global_constants()) { + stream << inst; + } + } + + if (!module.global_variables().empty()) { + stream << "\n"; + stream << "; Global Variables\n"; + for (const SpvInstruction &inst : module.global_variables()) { + stream << inst; + } + } + + if (!module.function_definitions().empty()) { + stream << "\n"; + stream << "; Function Definitions\n"; + for (const SpvFunction &func : module.function_definitions()) { + stream << func; + } + } + + return stream; +} + +std::ostream &operator<<(std::ostream &stream, const SpvFunction &func) { + if (!func.is_defined()) { + stream << "(undefined)"; + return stream; + } + stream << func.declaration(); + for (const SpvInstruction ¶m : func.parameters()) { + stream << param; + } + for (const SpvBlock &block : func.blocks()) { + stream << block; + } + SpvInstruction inst = SpvFactory::function_end(); + stream << inst; + return stream; +} + +std::ostream &operator<<(std::ostream &stream, const SpvBlock &block) { + if (!block.is_defined()) { + stream << "(undefined)"; + return stream; + } + + SpvInstruction label = SpvFactory::label(block.id()); + stream << label; + for (const SpvInstruction &variable : block.variables()) { + stream << variable; + } + for (const SpvInstruction &instruction : block.instructions()) { + stream << instruction; + } + + return stream; +} + +std::ostream &operator<<(std::ostream &stream, const SpvInstruction &inst) { + if (!inst.is_defined()) { + stream << "(undefined)"; + return stream; + } + + if (inst.has_result()) { + stream << std::string("%") << std::to_string(inst.result_id()); + stream << " = "; + } + + stream << spirv_op_name(inst.op_code()); + + if (inst.has_type()) { + stream << std::string(" %") << std::to_string(inst.type_id()); + } + + for (uint32_t i = 0; i < inst.length(); i++) { + if (inst.is_immediate(i)) { + if (inst.value_type(i) == SpvStringData) { + const char *str = (const char *)inst.data(i); + stream << std::string(" \"") << str << "\""; + break; + } else if (inst.value_type(i) == SpvIntegerData) { + const int *data = (const int *)inst.data(i); + stream << std::string(" ") << std::to_string(*data); + break; + } else if (inst.value_type(i) == SpvFloatData) { + const float *data = (const float *)inst.data(i); + stream << std::string(" ") << std::to_string(*data); + break; + } else if (inst.value_type(i) == SpvBitMaskLiteral) { + stream << std::string(" ") << std::hex << std::showbase << std::uppercase << inst.operand(i) << std::dec; + } else { + stream << std::string(" ") << std::to_string(inst.operand(i)); + } + } else { + stream << std::string(" %") << std::to_string(inst.operand(i)); + } + } + + stream << "\n"; + return stream; +} + +// -- + +namespace { + +/** Returns the name string for a given SPIR-V operand **/ +const std::string &spirv_op_name(SpvId op) { + using SpvOpNameMap = std::unordered_map; + static const SpvOpNameMap op_names = { + {SpvOpNop, "OpNop"}, + {SpvOpUndef, "OpUndef"}, + {SpvOpSourceContinued, "OpSourceContinued"}, + {SpvOpSource, "OpSource"}, + {SpvOpSourceExtension, "OpSourceExtension"}, + {SpvOpName, "OpName"}, + {SpvOpMemberName, "OpMemberName"}, + {SpvOpString, "OpString"}, + {SpvOpLine, "OpLine"}, + {SpvOpExtension, "OpExtension"}, + {SpvOpExtInstImport, "OpExtInstImport"}, + {SpvOpExtInst, "OpExtInst"}, + {SpvOpMemoryModel, "OpMemoryModel"}, + {SpvOpEntryPoint, "OpEntryPoint"}, + {SpvOpExecutionMode, "OpExecutionMode"}, + {SpvOpCapability, "OpCapability"}, + {SpvOpTypeVoid, "OpTypeVoid"}, + {SpvOpTypeBool, "OpTypeBool"}, + {SpvOpTypeInt, "OpTypeInt"}, + {SpvOpTypeFloat, "OpTypeFloat"}, + {SpvOpTypeVector, "OpTypeVector"}, + {SpvOpTypeMatrix, "OpTypeMatrix"}, + {SpvOpTypeImage, "OpTypeImage"}, + {SpvOpTypeSampler, "OpTypeSampler"}, + {SpvOpTypeSampledImage, "OpTypeSampledImage"}, + {SpvOpTypeArray, "OpTypeArray"}, + {SpvOpTypeRuntimeArray, "OpTypeRuntimeArray"}, + {SpvOpTypeStruct, "OpTypeStruct"}, + {SpvOpTypeOpaque, "OpTypeOpaque"}, + {SpvOpTypePointer, "OpTypePointer"}, + {SpvOpTypeFunction, "OpTypeFunction"}, + {SpvOpTypeEvent, "OpTypeEvent"}, + {SpvOpTypeDeviceEvent, "OpTypeDeviceEvent"}, + {SpvOpTypeReserveId, "OpTypeReserveId"}, + {SpvOpTypeQueue, "OpTypeQueue"}, + {SpvOpTypePipe, "OpTypePipe"}, + {SpvOpTypeForwardPointer, "OpTypeForwardPointer"}, + {SpvOpConstantTrue, "OpConstantTrue"}, + {SpvOpConstantFalse, "OpConstantFalse"}, + {SpvOpConstant, "OpConstant"}, + {SpvOpConstantComposite, "OpConstantComposite"}, + {SpvOpConstantSampler, "OpConstantSampler"}, + {SpvOpConstantNull, "OpConstantNull"}, + {SpvOpSpecConstantTrue, "OpSpecConstantTrue"}, + {SpvOpSpecConstantFalse, "OpSpecConstantFalse"}, + {SpvOpSpecConstant, "OpSpecConstant"}, + {SpvOpSpecConstantComposite, "OpSpecConstantComposite"}, + {SpvOpSpecConstantOp, "OpSpecConstantOp"}, + {SpvOpFunction, "OpFunction"}, + {SpvOpFunctionParameter, "OpFunctionParameter"}, + {SpvOpFunctionEnd, "OpFunctionEnd"}, + {SpvOpFunctionCall, "OpFunctionCall"}, + {SpvOpVariable, "OpVariable"}, + {SpvOpImageTexelPointer, "OpImageTexelPointer"}, + {SpvOpLoad, "OpLoad"}, + {SpvOpStore, "OpStore"}, + {SpvOpCopyMemory, "OpCopyMemory"}, + {SpvOpCopyMemorySized, "OpCopyMemorySized"}, + {SpvOpAccessChain, "OpAccessChain"}, + {SpvOpInBoundsAccessChain, "OpInBoundsAccessChain"}, + {SpvOpPtrAccessChain, "OpPtrAccessChain"}, + {SpvOpArrayLength, "OpArrayLength"}, + {SpvOpGenericPtrMemSemantics, "OpGenericPtrMemSemantics"}, + {SpvOpInBoundsPtrAccessChain, "OpInBoundsPtrAccessChain"}, + {SpvOpDecorate, "OpDecorate"}, + {SpvOpMemberDecorate, "OpMemberDecorate"}, + {SpvOpDecorationGroup, "OpDecorationGroup"}, + {SpvOpGroupDecorate, "OpGroupDecorate"}, + {SpvOpGroupMemberDecorate, "OpGroupMemberDecorate"}, + {SpvOpVectorExtractDynamic, "OpVectorExtractDynamic"}, + {SpvOpVectorInsertDynamic, "OpVectorInsertDynamic"}, + {SpvOpVectorShuffle, "OpVectorShuffle"}, + {SpvOpCompositeConstruct, "OpCompositeConstruct"}, + {SpvOpCompositeExtract, "OpCompositeExtract"}, + {SpvOpCompositeInsert, "OpCompositeInsert"}, + {SpvOpCopyObject, "OpCopyObject"}, + {SpvOpTranspose, "OpTranspose"}, + {SpvOpSampledImage, "OpSampledImage"}, + {SpvOpImageSampleImplicitLod, "OpImageSampleImplicitLod"}, + {SpvOpImageSampleExplicitLod, "OpImageSampleExplicitLod"}, + {SpvOpImageSampleDrefImplicitLod, "OpImageSampleDrefImplicitLod"}, + {SpvOpImageSampleDrefExplicitLod, "OpImageSampleDrefExplicitLod"}, + {SpvOpImageSampleProjImplicitLod, "OpImageSampleProjImplicitLod"}, + {SpvOpImageSampleProjExplicitLod, "OpImageSampleProjExplicitLod"}, + {SpvOpImageSampleProjDrefImplicitLod, "OpImageSampleProjDrefImplicitLod"}, + {SpvOpImageSampleProjDrefExplicitLod, "OpImageSampleProjDrefExplicitLod"}, + {SpvOpImageFetch, "OpImageFetch"}, + {SpvOpImageGather, "OpImageGather"}, + {SpvOpImageDrefGather, "OpImageDrefGather"}, + {SpvOpImageRead, "OpImageRead"}, + {SpvOpImageWrite, "OpImageWrite"}, + {SpvOpImage, "OpImage"}, + {SpvOpImageQueryFormat, "OpImageQueryFormat"}, + {SpvOpImageQueryOrder, "OpImageQueryOrder"}, + {SpvOpImageQuerySizeLod, "OpImageQuerySizeLod"}, + {SpvOpImageQuerySize, "OpImageQuerySize"}, + {SpvOpImageQueryLod, "OpImageQueryLod"}, + {SpvOpImageQueryLevels, "OpImageQueryLevels"}, + {SpvOpImageQuerySamples, "OpImageQuerySamples"}, + {SpvOpConvertFToU, "OpConvertFToU"}, + {SpvOpConvertFToS, "OpConvertFToS"}, + {SpvOpConvertSToF, "OpConvertSToF"}, + {SpvOpConvertUToF, "OpConvertUToF"}, + {SpvOpUConvert, "OpUConvert"}, + {SpvOpSConvert, "OpSConvert"}, + {SpvOpFConvert, "OpFConvert"}, + {SpvOpQuantizeToF16, "OpQuantizeToF16"}, + {SpvOpConvertPtrToU, "OpConvertPtrToU"}, + {SpvOpSatConvertSToU, "OpSatConvertSToU"}, + {SpvOpSatConvertUToS, "OpSatConvertUToS"}, + {SpvOpConvertUToPtr, "OpConvertUToPtr"}, + {SpvOpPtrCastToGeneric, "OpPtrCastToGeneric"}, + {SpvOpGenericCastToPtr, "OpGenericCastToPtr"}, + {SpvOpGenericCastToPtrExplicit, "OpGenericCastToPtrExplicit"}, + {SpvOpBitcast, "OpBitcast"}, + {SpvOpSNegate, "OpSNegate"}, + {SpvOpFNegate, "OpFNegate"}, + {SpvOpIAdd, "OpIAdd"}, + {SpvOpFAdd, "OpFAdd"}, + {SpvOpISub, "OpISub"}, + {SpvOpFSub, "OpFSub"}, + {SpvOpIMul, "OpIMul"}, + {SpvOpFMul, "OpFMul"}, + {SpvOpUDiv, "OpUDiv"}, + {SpvOpSDiv, "OpSDiv"}, + {SpvOpFDiv, "OpFDiv"}, + {SpvOpUMod, "OpUMod"}, + {SpvOpSRem, "OpSRem"}, + {SpvOpSMod, "OpSMod"}, + {SpvOpFRem, "OpFRem"}, + {SpvOpFMod, "OpFMod"}, + {SpvOpVectorTimesScalar, "OpVectorTimesScalar"}, + {SpvOpMatrixTimesScalar, "OpMatrixTimesScalar"}, + {SpvOpVectorTimesMatrix, "OpVectorTimesMatrix"}, + {SpvOpMatrixTimesVector, "OpMatrixTimesVector"}, + {SpvOpMatrixTimesMatrix, "OpMatrixTimesMatrix"}, + {SpvOpOuterProduct, "OpOuterProduct"}, + {SpvOpDot, "OpDot"}, + {SpvOpIAddCarry, "OpIAddCarry"}, + {SpvOpISubBorrow, "OpISubBorrow"}, + {SpvOpUMulExtended, "OpUMulExtended"}, + {SpvOpSMulExtended, "OpSMulExtended"}, + {SpvOpAny, "OpAny"}, + {SpvOpAll, "OpAll"}, + {SpvOpIsNan, "OpIsNan"}, + {SpvOpIsInf, "OpIsInf"}, + {SpvOpIsFinite, "OpIsFinite"}, + {SpvOpIsNormal, "OpIsNormal"}, + {SpvOpSignBitSet, "OpSignBitSet"}, + {SpvOpLessOrGreater, "OpLessOrGreater"}, + {SpvOpOrdered, "OpOrdered"}, + {SpvOpUnordered, "OpUnordered"}, + {SpvOpLogicalEqual, "OpLogicalEqual"}, + {SpvOpLogicalNotEqual, "OpLogicalNotEqual"}, + {SpvOpLogicalOr, "OpLogicalOr"}, + {SpvOpLogicalAnd, "OpLogicalAnd"}, + {SpvOpLogicalNot, "OpLogicalNot"}, + {SpvOpSelect, "OpSelect"}, + {SpvOpIEqual, "OpIEqual"}, + {SpvOpINotEqual, "OpINotEqual"}, + {SpvOpUGreaterThan, "OpUGreaterThan"}, + {SpvOpSGreaterThan, "OpSGreaterThan"}, + {SpvOpUGreaterThanEqual, "OpUGreaterThanEqual"}, + {SpvOpSGreaterThanEqual, "OpSGreaterThanEqual"}, + {SpvOpULessThan, "OpULessThan"}, + {SpvOpSLessThan, "OpSLessThan"}, + {SpvOpULessThanEqual, "OpULessThanEqual"}, + {SpvOpSLessThanEqual, "OpSLessThanEqual"}, + {SpvOpFOrdEqual, "OpFOrdEqual"}, + {SpvOpFUnordEqual, "OpFUnordEqual"}, + {SpvOpFOrdNotEqual, "OpFOrdNotEqual"}, + {SpvOpFUnordNotEqual, "OpFUnordNotEqual"}, + {SpvOpFOrdLessThan, "OpFOrdLessThan"}, + {SpvOpFUnordLessThan, "OpFUnordLessThan"}, + {SpvOpFOrdGreaterThan, "OpFOrdGreaterThan"}, + {SpvOpFUnordGreaterThan, "OpFUnordGreaterThan"}, + {SpvOpFOrdLessThanEqual, "OpFOrdLessThanEqual"}, + {SpvOpFUnordLessThanEqual, "OpFUnordLessThanEqual"}, + {SpvOpFOrdGreaterThanEqual, "OpFOrdGreaterThanEqual"}, + {SpvOpFUnordGreaterThanEqual, "OpFUnordGreaterThanEqual"}, + {SpvOpShiftRightLogical, "OpShiftRightLogical"}, + {SpvOpShiftRightArithmetic, "OpShiftRightArithmetic"}, + {SpvOpShiftLeftLogical, "OpShiftLeftLogical"}, + {SpvOpBitwiseOr, "OpBitwiseOr"}, + {SpvOpBitwiseXor, "OpBitwiseXor"}, + {SpvOpBitwiseAnd, "OpBitwiseAnd"}, + {SpvOpNot, "OpNot"}, + {SpvOpBitFieldInsert, "OpBitFieldInsert"}, + {SpvOpBitFieldSExtract, "OpBitFieldSExtract"}, + {SpvOpBitFieldUExtract, "OpBitFieldUExtract"}, + {SpvOpBitReverse, "OpBitReverse"}, + {SpvOpBitCount, "OpBitCount"}, + {SpvOpDPdx, "OpDPdx"}, + {SpvOpDPdy, "OpDPdy"}, + {SpvOpFwidth, "OpFwidth"}, + {SpvOpDPdxFine, "OpDPdxFine"}, + {SpvOpDPdyFine, "OpDPdyFine"}, + {SpvOpFwidthFine, "OpFwidthFine"}, + {SpvOpDPdxCoarse, "OpDPdxCoarse"}, + {SpvOpDPdyCoarse, "OpDPdyCoarse"}, + {SpvOpFwidthCoarse, "OpFwidthCoarse"}, + {SpvOpEmitVertex, "OpEmitVertex"}, + {SpvOpEndPrimitive, "OpEndPrimitive"}, + {SpvOpEmitStreamVertex, "OpEmitStreamVertex"}, + {SpvOpEndStreamPrimitive, "OpEndStreamPrimitive"}, + {SpvOpControlBarrier, "OpControlBarrier"}, + {SpvOpMemoryBarrier, "OpMemoryBarrier"}, + {SpvOpAtomicLoad, "OpAtomicLoad"}, + {SpvOpAtomicStore, "OpAtomicStore"}, + {SpvOpAtomicExchange, "OpAtomicExchange"}, + {SpvOpAtomicCompareExchange, "OpAtomicCompareExchange"}, + {SpvOpAtomicCompareExchangeWeak, "OpAtomicCompareExchangeWeak"}, + {SpvOpAtomicIIncrement, "OpAtomicIIncrement"}, + {SpvOpAtomicIDecrement, "OpAtomicIDecrement"}, + {SpvOpAtomicIAdd, "OpAtomicIAdd"}, + {SpvOpAtomicISub, "OpAtomicISub"}, + {SpvOpAtomicSMin, "OpAtomicSMin"}, + {SpvOpAtomicUMin, "OpAtomicUMin"}, + {SpvOpAtomicSMax, "OpAtomicSMax"}, + {SpvOpAtomicUMax, "OpAtomicUMax"}, + {SpvOpAtomicAnd, "OpAtomicAnd"}, + {SpvOpAtomicOr, "OpAtomicOr"}, + {SpvOpAtomicXor, "OpAtomicXor"}, + {SpvOpPhi, "OpPhi"}, + {SpvOpLoopMerge, "OpLoopMerge"}, + {SpvOpSelectionMerge, "OpSelectionMerge"}, + {SpvOpLabel, "OpLabel"}, + {SpvOpBranch, "OpBranch"}, + {SpvOpBranchConditional, "OpBranchConditional"}, + {SpvOpSwitch, "OpSwitch"}, + {SpvOpKill, "OpKill"}, + {SpvOpReturn, "OpReturn"}, + {SpvOpReturnValue, "OpReturnValue"}, + {SpvOpUnreachable, "OpUnreachable"}, + {SpvOpLifetimeStart, "OpLifetimeStart"}, + {SpvOpLifetimeStop, "OpLifetimeStop"}, + {SpvOpGroupAsyncCopy, "OpGroupAsyncCopy"}, + {SpvOpGroupWaitEvents, "OpGroupWaitEvents"}, + {SpvOpGroupAll, "OpGroupAll"}, + {SpvOpGroupAny, "OpGroupAny"}, + {SpvOpGroupBroadcast, "OpGroupBroadcast"}, + {SpvOpGroupIAdd, "OpGroupIAdd"}, + {SpvOpGroupFAdd, "OpGroupFAdd"}, + {SpvOpGroupFMin, "OpGroupFMin"}, + {SpvOpGroupUMin, "OpGroupUMin"}, + {SpvOpGroupSMin, "OpGroupSMin"}, + {SpvOpGroupFMax, "OpGroupFMax"}, + {SpvOpGroupUMax, "OpGroupUMax"}, + {SpvOpGroupSMax, "OpGroupSMax"}, + {SpvOpReadPipe, "OpReadPipe"}, + {SpvOpWritePipe, "OpWritePipe"}, + {SpvOpReservedReadPipe, "OpReservedReadPipe"}, + {SpvOpReservedWritePipe, "OpReservedWritePipe"}, + {SpvOpReserveReadPipePackets, "OpReserveReadPipePackets"}, + {SpvOpReserveWritePipePackets, "OpReserveWritePipePackets"}, + {SpvOpCommitReadPipe, "OpCommitReadPipe"}, + {SpvOpCommitWritePipe, "OpCommitWritePipe"}, + {SpvOpIsValidReserveId, "OpIsValidReserveId"}, + {SpvOpGetNumPipePackets, "OpGetNumPipePackets"}, + {SpvOpGetMaxPipePackets, "OpGetMaxPipePackets"}, + {SpvOpGroupReserveReadPipePackets, "OpGroupReserveReadPipePackets"}, + {SpvOpGroupReserveWritePipePackets, "OpGroupReserveWritePipePackets"}, + {SpvOpGroupCommitReadPipe, "OpGroupCommitReadPipe"}, + {SpvOpGroupCommitWritePipe, "OpGroupCommitWritePipe"}, + {SpvOpEnqueueMarker, "OpEnqueueMarker"}, + {SpvOpEnqueueKernel, "OpEnqueueKernel"}, + {SpvOpGetKernelNDrangeSubGroupCount, "OpGetKernelNDrangeSubGroupCount"}, + {SpvOpGetKernelNDrangeMaxSubGroupSize, "OpGetKernelNDrangeMaxSubGroupSize"}, + {SpvOpGetKernelWorkGroupSize, "OpGetKernelWorkGroupSize"}, + {SpvOpGetKernelPreferredWorkGroupSizeMultiple, "OpGetKernelPreferredWorkGroupSizeMultiple"}, + {SpvOpRetainEvent, "OpRetainEvent"}, + {SpvOpReleaseEvent, "OpReleaseEvent"}, + {SpvOpCreateUserEvent, "OpCreateUserEvent"}, + {SpvOpIsValidEvent, "OpIsValidEvent"}, + {SpvOpSetUserEventStatus, "OpSetUserEventStatus"}, + {SpvOpCaptureEventProfilingInfo, "OpCaptureEventProfilingInfo"}, + {SpvOpGetDefaultQueue, "OpGetDefaultQueue"}, + {SpvOpBuildNDRange, "OpBuildNDRange"}, + {SpvOpImageSparseSampleImplicitLod, "OpImageSparseSampleImplicitLod"}, + {SpvOpImageSparseSampleExplicitLod, "OpImageSparseSampleExplicitLod"}, + {SpvOpImageSparseSampleDrefImplicitLod, "OpImageSparseSampleDrefImplicitLod"}, + {SpvOpImageSparseSampleDrefExplicitLod, "OpImageSparseSampleDrefExplicitLod"}, + {SpvOpImageSparseSampleProjImplicitLod, "OpImageSparseSampleProjImplicitLod"}, + {SpvOpImageSparseSampleProjExplicitLod, "OpImageSparseSampleProjExplicitLod"}, + {SpvOpImageSparseSampleProjDrefImplicitLod, "OpImageSparseSampleProjDrefImplicitLod"}, + {SpvOpImageSparseSampleProjDrefExplicitLod, "OpImageSparseSampleProjDrefExplicitLod"}, + {SpvOpImageSparseFetch, "OpImageSparseFetch"}, + {SpvOpImageSparseGather, "OpImageSparseGather"}, + {SpvOpImageSparseDrefGather, "OpImageSparseDrefGather"}, + {SpvOpImageSparseTexelsResident, "OpImageSparseTexelsResident"}, + {SpvOpNoLine, "OpNoLine"}, + {SpvOpAtomicFlagTestAndSet, "OpAtomicFlagTestAndSet"}, + {SpvOpAtomicFlagClear, "OpAtomicFlagClear"}, + {SpvOpImageSparseRead, "OpImageSparseRead"}, + {SpvOpSizeOf, "OpSizeOf"}, + {SpvOpTypePipeStorage, "OpTypePipeStorage"}, + {SpvOpConstantPipeStorage, "OpConstantPipeStorage"}, + {SpvOpCreatePipeFromPipeStorage, "OpCreatePipeFromPipeStorage"}, + {SpvOpGetKernelLocalSizeForSubgroupCount, "OpGetKernelLocalSizeForSubgroupCount"}, + {SpvOpGetKernelMaxNumSubgroups, "OpGetKernelMaxNumSubgroups"}, + {SpvOpTypeNamedBarrier, "OpTypeNamedBarrier"}, + {SpvOpNamedBarrierInitialize, "OpNamedBarrierInitialize"}, + {SpvOpMemoryNamedBarrier, "OpMemoryNamedBarrier"}, + {SpvOpModuleProcessed, "OpModuleProcessed"}, + {SpvOpExecutionModeId, "OpExecutionModeId"}, + {SpvOpDecorateId, "OpDecorateId"}, + {SpvOpGroupNonUniformElect, "OpGroupNonUniformElect"}, + {SpvOpGroupNonUniformAll, "OpGroupNonUniformAll"}, + {SpvOpGroupNonUniformAny, "OpGroupNonUniformAny"}, + {SpvOpGroupNonUniformAllEqual, "OpGroupNonUniformAllEqual"}, + {SpvOpGroupNonUniformBroadcast, "OpGroupNonUniformBroadcast"}, + {SpvOpGroupNonUniformBroadcastFirst, "OpGroupNonUniformBroadcastFirst"}, + {SpvOpGroupNonUniformBallot, "OpGroupNonUniformBallot"}, + {SpvOpGroupNonUniformInverseBallot, "OpGroupNonUniformInverseBallot"}, + {SpvOpGroupNonUniformBallotBitExtract, "OpGroupNonUniformBallotBitExtract"}, + {SpvOpGroupNonUniformBallotBitCount, "OpGroupNonUniformBallotBitCount"}, + {SpvOpGroupNonUniformBallotFindLSB, "OpGroupNonUniformBallotFindLSB"}, + {SpvOpGroupNonUniformBallotFindMSB, "OpGroupNonUniformBallotFindMSB"}, + {SpvOpGroupNonUniformShuffle, "OpGroupNonUniformShuffle"}, + {SpvOpGroupNonUniformShuffleXor, "OpGroupNonUniformShuffleXor"}, + {SpvOpGroupNonUniformShuffleUp, "OpGroupNonUniformShuffleUp"}, + {SpvOpGroupNonUniformShuffleDown, "OpGroupNonUniformShuffleDown"}, + {SpvOpGroupNonUniformIAdd, "OpGroupNonUniformIAdd"}, + {SpvOpGroupNonUniformFAdd, "OpGroupNonUniformFAdd"}, + {SpvOpGroupNonUniformIMul, "OpGroupNonUniformIMul"}, + {SpvOpGroupNonUniformFMul, "OpGroupNonUniformFMul"}, + {SpvOpGroupNonUniformSMin, "OpGroupNonUniformSMin"}, + {SpvOpGroupNonUniformUMin, "OpGroupNonUniformUMin"}, + {SpvOpGroupNonUniformFMin, "OpGroupNonUniformFMin"}, + {SpvOpGroupNonUniformSMax, "OpGroupNonUniformSMax"}, + {SpvOpGroupNonUniformUMax, "OpGroupNonUniformUMax"}, + {SpvOpGroupNonUniformFMax, "OpGroupNonUniformFMax"}, + {SpvOpGroupNonUniformBitwiseAnd, "OpGroupNonUniformBitwiseAnd"}, + {SpvOpGroupNonUniformBitwiseOr, "OpGroupNonUniformBitwiseOr"}, + {SpvOpGroupNonUniformBitwiseXor, "OpGroupNonUniformBitwiseXor"}, + {SpvOpGroupNonUniformLogicalAnd, "OpGroupNonUniformLogicalAnd"}, + {SpvOpGroupNonUniformLogicalOr, "OpGroupNonUniformLogicalOr"}, + {SpvOpGroupNonUniformLogicalXor, "OpGroupNonUniformLogicalXor"}, + {SpvOpGroupNonUniformQuadBroadcast, "OpGroupNonUniformQuadBroadcast"}, + {SpvOpGroupNonUniformQuadSwap, "OpGroupNonUniformQuadSwap"}, + {SpvOpCopyLogical, "OpCopyLogical"}, + {SpvOpPtrEqual, "OpPtrEqual"}, + {SpvOpPtrNotEqual, "OpPtrNotEqual"}, + {SpvOpPtrDiff, "OpPtrDiff"}, + {SpvOpTerminateInvocation, "OpTerminateInvocation"}, + {SpvOpSubgroupBallotKHR, "OpSubgroupBallotKHR"}, + {SpvOpSubgroupFirstInvocationKHR, "OpSubgroupFirstInvocationKHR"}, + {SpvOpSubgroupAllKHR, "OpSubgroupAllKHR"}, + {SpvOpSubgroupAnyKHR, "OpSubgroupAnyKHR"}, + {SpvOpSubgroupAllEqualKHR, "OpSubgroupAllEqualKHR"}, + {SpvOpGroupNonUniformRotateKHR, "OpGroupNonUniformRotateKHR"}, + {SpvOpSubgroupReadInvocationKHR, "OpSubgroupReadInvocationKHR"}, + {SpvOpTraceRayKHR, "OpTraceRayKHR"}, + {SpvOpExecuteCallableKHR, "OpExecuteCallableKHR"}, + {SpvOpConvertUToAccelerationStructureKHR, "OpConvertUToAccelerationStructureKHR"}, + {SpvOpIgnoreIntersectionKHR, "OpIgnoreIntersectionKHR"}, + {SpvOpTerminateRayKHR, "OpTerminateRayKHR"}, + {SpvOpSDot, "OpSDot"}, + {SpvOpSDotKHR, "OpSDotKHR"}, + {SpvOpUDot, "OpUDot"}, + {SpvOpUDotKHR, "OpUDotKHR"}, + {SpvOpSUDot, "OpSUDot"}, + {SpvOpSUDotKHR, "OpSUDotKHR"}, + {SpvOpSDotAccSat, "OpSDotAccSat"}, + {SpvOpSDotAccSatKHR, "OpSDotAccSatKHR"}, + {SpvOpUDotAccSat, "OpUDotAccSat"}, + {SpvOpUDotAccSatKHR, "OpUDotAccSatKHR"}, + {SpvOpSUDotAccSat, "OpSUDotAccSat"}, + {SpvOpSUDotAccSatKHR, "OpSUDotAccSatKHR"}, + {SpvOpTypeRayQueryKHR, "OpTypeRayQueryKHR"}, + {SpvOpRayQueryInitializeKHR, "OpRayQueryInitializeKHR"}, + {SpvOpRayQueryTerminateKHR, "OpRayQueryTerminateKHR"}, + {SpvOpRayQueryGenerateIntersectionKHR, "OpRayQueryGenerateIntersectionKHR"}, + {SpvOpRayQueryConfirmIntersectionKHR, "OpRayQueryConfirmIntersectionKHR"}, + {SpvOpRayQueryProceedKHR, "OpRayQueryProceedKHR"}, + {SpvOpRayQueryGetIntersectionTypeKHR, "OpRayQueryGetIntersectionTypeKHR"}, + {SpvOpGroupIAddNonUniformAMD, "OpGroupIAddNonUniformAMD"}, + {SpvOpGroupFAddNonUniformAMD, "OpGroupFAddNonUniformAMD"}, + {SpvOpGroupFMinNonUniformAMD, "OpGroupFMinNonUniformAMD"}, + {SpvOpGroupUMinNonUniformAMD, "OpGroupUMinNonUniformAMD"}, + {SpvOpGroupSMinNonUniformAMD, "OpGroupSMinNonUniformAMD"}, + {SpvOpGroupFMaxNonUniformAMD, "OpGroupFMaxNonUniformAMD"}, + {SpvOpGroupUMaxNonUniformAMD, "OpGroupUMaxNonUniformAMD"}, + {SpvOpGroupSMaxNonUniformAMD, "OpGroupSMaxNonUniformAMD"}, + {SpvOpFragmentMaskFetchAMD, "OpFragmentMaskFetchAMD"}, + {SpvOpFragmentFetchAMD, "OpFragmentFetchAMD"}, + {SpvOpReadClockKHR, "OpReadClockKHR"}, + {SpvOpImageSampleFootprintNV, "OpImageSampleFootprintNV"}, + {SpvOpEmitMeshTasksEXT, "OpEmitMeshTasksEXT"}, + {SpvOpSetMeshOutputsEXT, "OpSetMeshOutputsEXT"}, + {SpvOpGroupNonUniformPartitionNV, "OpGroupNonUniformPartitionNV"}, + {SpvOpWritePackedPrimitiveIndices4x8NV, "OpWritePackedPrimitiveIndices4x8NV"}, + {SpvOpReportIntersectionKHR, "OpReportIntersectionKHR"}, + {SpvOpReportIntersectionNV, "OpReportIntersectionNV"}, + {SpvOpIgnoreIntersectionNV, "OpIgnoreIntersectionNV"}, + {SpvOpTerminateRayNV, "OpTerminateRayNV"}, + {SpvOpTraceNV, "OpTraceNV"}, + {SpvOpTraceMotionNV, "OpTraceMotionNV"}, + {SpvOpTraceRayMotionNV, "OpTraceRayMotionNV"}, + {SpvOpTypeAccelerationStructureKHR, "OpTypeAccelerationStructureKHR"}, + {SpvOpTypeAccelerationStructureNV, "OpTypeAccelerationStructureNV"}, + {SpvOpExecuteCallableNV, "OpExecuteCallableNV"}, + {SpvOpTypeCooperativeMatrixNV, "OpTypeCooperativeMatrixNV"}, + {SpvOpCooperativeMatrixLoadNV, "OpCooperativeMatrixLoadNV"}, + {SpvOpCooperativeMatrixStoreNV, "OpCooperativeMatrixStoreNV"}, + {SpvOpCooperativeMatrixMulAddNV, "OpCooperativeMatrixMulAddNV"}, + {SpvOpCooperativeMatrixLengthNV, "OpCooperativeMatrixLengthNV"}, + {SpvOpBeginInvocationInterlockEXT, "OpBeginInvocationInterlockEXT"}, + {SpvOpEndInvocationInterlockEXT, "OpEndInvocationInterlockEXT"}, + {SpvOpDemoteToHelperInvocation, "OpDemoteToHelperInvocation"}, + {SpvOpDemoteToHelperInvocationEXT, "OpDemoteToHelperInvocationEXT"}, + {SpvOpIsHelperInvocationEXT, "OpIsHelperInvocationEXT"}, + {SpvOpConvertUToImageNV, "OpConvertUToImageNV"}, + {SpvOpConvertUToSamplerNV, "OpConvertUToSamplerNV"}, + {SpvOpConvertImageToUNV, "OpConvertImageToUNV"}, + {SpvOpConvertSamplerToUNV, "OpConvertSamplerToUNV"}, + {SpvOpConvertUToSampledImageNV, "OpConvertUToSampledImageNV"}, + {SpvOpConvertSampledImageToUNV, "OpConvertSampledImageToUNV"}, + {SpvOpSamplerImageAddressingModeNV, "OpSamplerImageAddressingModeNV"}, + {SpvOpSubgroupShuffleINTEL, "OpSubgroupShuffleINTEL"}, + {SpvOpSubgroupShuffleDownINTEL, "OpSubgroupShuffleDownINTEL"}, + {SpvOpSubgroupShuffleUpINTEL, "OpSubgroupShuffleUpINTEL"}, + {SpvOpSubgroupShuffleXorINTEL, "OpSubgroupShuffleXorINTEL"}, + {SpvOpSubgroupBlockReadINTEL, "OpSubgroupBlockReadINTEL"}, + {SpvOpSubgroupBlockWriteINTEL, "OpSubgroupBlockWriteINTEL"}, + {SpvOpSubgroupImageBlockReadINTEL, "OpSubgroupImageBlockReadINTEL"}, + {SpvOpSubgroupImageBlockWriteINTEL, "OpSubgroupImageBlockWriteINTEL"}, + {SpvOpSubgroupImageMediaBlockReadINTEL, "OpSubgroupImageMediaBlockReadINTEL"}, + {SpvOpSubgroupImageMediaBlockWriteINTEL, "OpSubgroupImageMediaBlockWriteINTEL"}, + {SpvOpUCountLeadingZerosINTEL, "OpUCountLeadingZerosINTEL"}, + {SpvOpUCountTrailingZerosINTEL, "OpUCountTrailingZerosINTEL"}, + {SpvOpAbsISubINTEL, "OpAbsISubINTEL"}, + {SpvOpAbsUSubINTEL, "OpAbsUSubINTEL"}, + {SpvOpIAddSatINTEL, "OpIAddSatINTEL"}, + {SpvOpUAddSatINTEL, "OpUAddSatINTEL"}, + {SpvOpIAverageINTEL, "OpIAverageINTEL"}, + {SpvOpUAverageINTEL, "OpUAverageINTEL"}, + {SpvOpIAverageRoundedINTEL, "OpIAverageRoundedINTEL"}, + {SpvOpUAverageRoundedINTEL, "OpUAverageRoundedINTEL"}, + {SpvOpISubSatINTEL, "OpISubSatINTEL"}, + {SpvOpUSubSatINTEL, "OpUSubSatINTEL"}, + {SpvOpIMul32x16INTEL, "OpIMul32x16INTEL"}, + {SpvOpUMul32x16INTEL, "OpUMul32x16INTEL"}, + {SpvOpConstantFunctionPointerINTEL, "OpConstantFunctionPointerINTEL"}, + {SpvOpFunctionPointerCallINTEL, "OpFunctionPointerCallINTEL"}, + {SpvOpAsmTargetINTEL, "OpAsmTargetINTEL"}, + {SpvOpAsmINTEL, "OpAsmINTEL"}, + {SpvOpAsmCallINTEL, "OpAsmCallINTEL"}, + {SpvOpAtomicFMinEXT, "OpAtomicFMinEXT"}, + {SpvOpAtomicFMaxEXT, "OpAtomicFMaxEXT"}, + {SpvOpAssumeTrueKHR, "OpAssumeTrueKHR"}, + {SpvOpExpectKHR, "OpExpectKHR"}, + {SpvOpDecorateString, "OpDecorateString"}, + {SpvOpDecorateStringGOOGLE, "OpDecorateStringGOOGLE"}, + {SpvOpMemberDecorateString, "OpMemberDecorateString"}, + {SpvOpMemberDecorateStringGOOGLE, "OpMemberDecorateStringGOOGLE"}, + {SpvOpVmeImageINTEL, "OpVmeImageINTEL"}, + {SpvOpTypeVmeImageINTEL, "OpTypeVmeImageINTEL"}, + {SpvOpTypeAvcImePayloadINTEL, "OpTypeAvcImePayloadINTEL"}, + {SpvOpTypeAvcRefPayloadINTEL, "OpTypeAvcRefPayloadINTEL"}, + {SpvOpTypeAvcSicPayloadINTEL, "OpTypeAvcSicPayloadINTEL"}, + {SpvOpTypeAvcMcePayloadINTEL, "OpTypeAvcMcePayloadINTEL"}, + {SpvOpTypeAvcMceResultINTEL, "OpTypeAvcMceResultINTEL"}, + {SpvOpTypeAvcImeResultINTEL, "OpTypeAvcImeResultINTEL"}, + {SpvOpTypeAvcImeResultSingleReferenceStreamoutINTEL, "OpTypeAvcImeResultSingleReferenceStreamoutINTEL"}, + {SpvOpTypeAvcImeResultDualReferenceStreamoutINTEL, "OpTypeAvcImeResultDualReferenceStreamoutINTEL"}, + {SpvOpTypeAvcImeSingleReferenceStreaminINTEL, "OpTypeAvcImeSingleReferenceStreaminINTEL"}, + {SpvOpTypeAvcImeDualReferenceStreaminINTEL, "OpTypeAvcImeDualReferenceStreaminINTEL"}, + {SpvOpTypeAvcRefResultINTEL, "OpTypeAvcRefResultINTEL"}, + {SpvOpTypeAvcSicResultINTEL, "OpTypeAvcSicResultINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL, "OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL"}, + {SpvOpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL, "OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL, "OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL"}, + {SpvOpSubgroupAvcMceSetInterShapePenaltyINTEL, "OpSubgroupAvcMceSetInterShapePenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL, "OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL"}, + {SpvOpSubgroupAvcMceSetInterDirectionPenaltyINTEL, "OpSubgroupAvcMceSetInterDirectionPenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL, "OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL, "OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL, "OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL, "OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL, "OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL"}, + {SpvOpSubgroupAvcMceSetMotionVectorCostFunctionINTEL, "OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL, "OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL, "OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL"}, + {SpvOpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL, "OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL"}, + {SpvOpSubgroupAvcMceSetAcOnlyHaarINTEL, "OpSubgroupAvcMceSetAcOnlyHaarINTEL"}, + {SpvOpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL, "OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL"}, + {SpvOpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL, "OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL"}, + {SpvOpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL, "OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL"}, + {SpvOpSubgroupAvcMceConvertToImePayloadINTEL, "OpSubgroupAvcMceConvertToImePayloadINTEL"}, + {SpvOpSubgroupAvcMceConvertToImeResultINTEL, "OpSubgroupAvcMceConvertToImeResultINTEL"}, + {SpvOpSubgroupAvcMceConvertToRefPayloadINTEL, "OpSubgroupAvcMceConvertToRefPayloadINTEL"}, + {SpvOpSubgroupAvcMceConvertToRefResultINTEL, "OpSubgroupAvcMceConvertToRefResultINTEL"}, + {SpvOpSubgroupAvcMceConvertToSicPayloadINTEL, "OpSubgroupAvcMceConvertToSicPayloadINTEL"}, + {SpvOpSubgroupAvcMceConvertToSicResultINTEL, "OpSubgroupAvcMceConvertToSicResultINTEL"}, + {SpvOpSubgroupAvcMceGetMotionVectorsINTEL, "OpSubgroupAvcMceGetMotionVectorsINTEL"}, + {SpvOpSubgroupAvcMceGetInterDistortionsINTEL, "OpSubgroupAvcMceGetInterDistortionsINTEL"}, + {SpvOpSubgroupAvcMceGetBestInterDistortionsINTEL, "OpSubgroupAvcMceGetBestInterDistortionsINTEL"}, + {SpvOpSubgroupAvcMceGetInterMajorShapeINTEL, "OpSubgroupAvcMceGetInterMajorShapeINTEL"}, + {SpvOpSubgroupAvcMceGetInterMinorShapeINTEL, "OpSubgroupAvcMceGetInterMinorShapeINTEL"}, + {SpvOpSubgroupAvcMceGetInterDirectionsINTEL, "OpSubgroupAvcMceGetInterDirectionsINTEL"}, + {SpvOpSubgroupAvcMceGetInterMotionVectorCountINTEL, "OpSubgroupAvcMceGetInterMotionVectorCountINTEL"}, + {SpvOpSubgroupAvcMceGetInterReferenceIdsINTEL, "OpSubgroupAvcMceGetInterReferenceIdsINTEL"}, + {SpvOpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL, "OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL"}, + {SpvOpSubgroupAvcImeInitializeINTEL, "OpSubgroupAvcImeInitializeINTEL"}, + {SpvOpSubgroupAvcImeSetSingleReferenceINTEL, "OpSubgroupAvcImeSetSingleReferenceINTEL"}, + {SpvOpSubgroupAvcImeSetDualReferenceINTEL, "OpSubgroupAvcImeSetDualReferenceINTEL"}, + {SpvOpSubgroupAvcImeRefWindowSizeINTEL, "OpSubgroupAvcImeRefWindowSizeINTEL"}, + {SpvOpSubgroupAvcImeAdjustRefOffsetINTEL, "OpSubgroupAvcImeAdjustRefOffsetINTEL"}, + {SpvOpSubgroupAvcImeConvertToMcePayloadINTEL, "OpSubgroupAvcImeConvertToMcePayloadINTEL"}, + {SpvOpSubgroupAvcImeSetMaxMotionVectorCountINTEL, "OpSubgroupAvcImeSetMaxMotionVectorCountINTEL"}, + {SpvOpSubgroupAvcImeSetUnidirectionalMixDisableINTEL, "OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL"}, + {SpvOpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL, "OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL"}, + {SpvOpSubgroupAvcImeSetWeightedSadINTEL, "OpSubgroupAvcImeSetWeightedSadINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithSingleReferenceINTEL, "OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithDualReferenceINTEL, "OpSubgroupAvcImeEvaluateWithDualReferenceINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL, "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL, "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL, "OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL, "OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL, "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL"}, + {SpvOpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL, "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL"}, + {SpvOpSubgroupAvcImeConvertToMceResultINTEL, "OpSubgroupAvcImeConvertToMceResultINTEL"}, + {SpvOpSubgroupAvcImeGetSingleReferenceStreaminINTEL, "OpSubgroupAvcImeGetSingleReferenceStreaminINTEL"}, + {SpvOpSubgroupAvcImeGetDualReferenceStreaminINTEL, "OpSubgroupAvcImeGetDualReferenceStreaminINTEL"}, + {SpvOpSubgroupAvcImeStripSingleReferenceStreamoutINTEL, "OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL"}, + {SpvOpSubgroupAvcImeStripDualReferenceStreamoutINTEL, "OpSubgroupAvcImeStripDualReferenceStreamoutINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL, "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL, "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL, "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL, "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL, "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL"}, + {SpvOpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL, "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL"}, + {SpvOpSubgroupAvcImeGetBorderReachedINTEL, "OpSubgroupAvcImeGetBorderReachedINTEL"}, + {SpvOpSubgroupAvcImeGetTruncatedSearchIndicationINTEL, "OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL"}, + {SpvOpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL, "OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL"}, + {SpvOpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL, "OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL"}, + {SpvOpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL, "OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL"}, + {SpvOpSubgroupAvcFmeInitializeINTEL, "OpSubgroupAvcFmeInitializeINTEL"}, + {SpvOpSubgroupAvcBmeInitializeINTEL, "OpSubgroupAvcBmeInitializeINTEL"}, + {SpvOpSubgroupAvcRefConvertToMcePayloadINTEL, "OpSubgroupAvcRefConvertToMcePayloadINTEL"}, + {SpvOpSubgroupAvcRefSetBidirectionalMixDisableINTEL, "OpSubgroupAvcRefSetBidirectionalMixDisableINTEL"}, + {SpvOpSubgroupAvcRefSetBilinearFilterEnableINTEL, "OpSubgroupAvcRefSetBilinearFilterEnableINTEL"}, + {SpvOpSubgroupAvcRefEvaluateWithSingleReferenceINTEL, "OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL"}, + {SpvOpSubgroupAvcRefEvaluateWithDualReferenceINTEL, "OpSubgroupAvcRefEvaluateWithDualReferenceINTEL"}, + {SpvOpSubgroupAvcRefEvaluateWithMultiReferenceINTEL, "OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL"}, + {SpvOpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL, "OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL"}, + {SpvOpSubgroupAvcRefConvertToMceResultINTEL, "OpSubgroupAvcRefConvertToMceResultINTEL"}, + {SpvOpSubgroupAvcSicInitializeINTEL, "OpSubgroupAvcSicInitializeINTEL"}, + {SpvOpSubgroupAvcSicConfigureSkcINTEL, "OpSubgroupAvcSicConfigureSkcINTEL"}, + {SpvOpSubgroupAvcSicConfigureIpeLumaINTEL, "OpSubgroupAvcSicConfigureIpeLumaINTEL"}, + {SpvOpSubgroupAvcSicConfigureIpeLumaChromaINTEL, "OpSubgroupAvcSicConfigureIpeLumaChromaINTEL"}, + {SpvOpSubgroupAvcSicGetMotionVectorMaskINTEL, "OpSubgroupAvcSicGetMotionVectorMaskINTEL"}, + {SpvOpSubgroupAvcSicConvertToMcePayloadINTEL, "OpSubgroupAvcSicConvertToMcePayloadINTEL"}, + {SpvOpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL, "OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL"}, + {SpvOpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL, "OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL"}, + {SpvOpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL, "OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL"}, + {SpvOpSubgroupAvcSicSetBilinearFilterEnableINTEL, "OpSubgroupAvcSicSetBilinearFilterEnableINTEL"}, + {SpvOpSubgroupAvcSicSetSkcForwardTransformEnableINTEL, "OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL"}, + {SpvOpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL, "OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL"}, + {SpvOpSubgroupAvcSicEvaluateIpeINTEL, "OpSubgroupAvcSicEvaluateIpeINTEL"}, + {SpvOpSubgroupAvcSicEvaluateWithSingleReferenceINTEL, "OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL"}, + {SpvOpSubgroupAvcSicEvaluateWithDualReferenceINTEL, "OpSubgroupAvcSicEvaluateWithDualReferenceINTEL"}, + {SpvOpSubgroupAvcSicEvaluateWithMultiReferenceINTEL, "OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL"}, + {SpvOpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL, "OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL"}, + {SpvOpSubgroupAvcSicConvertToMceResultINTEL, "OpSubgroupAvcSicConvertToMceResultINTEL"}, + {SpvOpSubgroupAvcSicGetIpeLumaShapeINTEL, "OpSubgroupAvcSicGetIpeLumaShapeINTEL"}, + {SpvOpSubgroupAvcSicGetBestIpeLumaDistortionINTEL, "OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL"}, + {SpvOpSubgroupAvcSicGetBestIpeChromaDistortionINTEL, "OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL"}, + {SpvOpSubgroupAvcSicGetPackedIpeLumaModesINTEL, "OpSubgroupAvcSicGetPackedIpeLumaModesINTEL"}, + {SpvOpSubgroupAvcSicGetIpeChromaModeINTEL, "OpSubgroupAvcSicGetIpeChromaModeINTEL"}, + {SpvOpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL, "OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL"}, + {SpvOpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL, "OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL"}, + {SpvOpSubgroupAvcSicGetInterRawSadsINTEL, "OpSubgroupAvcSicGetInterRawSadsINTEL"}, + {SpvOpVariableLengthArrayINTEL, "OpVariableLengthArrayINTEL"}, + {SpvOpSaveMemoryINTEL, "OpSaveMemoryINTEL"}, + {SpvOpRestoreMemoryINTEL, "OpRestoreMemoryINTEL"}, + {SpvOpArbitraryFloatSinCosPiINTEL, "OpArbitraryFloatSinCosPiINTEL"}, + {SpvOpArbitraryFloatCastINTEL, "OpArbitraryFloatCastINTEL"}, + {SpvOpArbitraryFloatCastFromIntINTEL, "OpArbitraryFloatCastFromIntINTEL"}, + {SpvOpArbitraryFloatCastToIntINTEL, "OpArbitraryFloatCastToIntINTEL"}, + {SpvOpArbitraryFloatAddINTEL, "OpArbitraryFloatAddINTEL"}, + {SpvOpArbitraryFloatSubINTEL, "OpArbitraryFloatSubINTEL"}, + {SpvOpArbitraryFloatMulINTEL, "OpArbitraryFloatMulINTEL"}, + {SpvOpArbitraryFloatDivINTEL, "OpArbitraryFloatDivINTEL"}, + {SpvOpArbitraryFloatGTINTEL, "OpArbitraryFloatGTINTEL"}, + {SpvOpArbitraryFloatGEINTEL, "OpArbitraryFloatGEINTEL"}, + {SpvOpArbitraryFloatLTINTEL, "OpArbitraryFloatLTINTEL"}, + {SpvOpArbitraryFloatLEINTEL, "OpArbitraryFloatLEINTEL"}, + {SpvOpArbitraryFloatEQINTEL, "OpArbitraryFloatEQINTEL"}, + {SpvOpArbitraryFloatRecipINTEL, "OpArbitraryFloatRecipINTEL"}, + {SpvOpArbitraryFloatRSqrtINTEL, "OpArbitraryFloatRSqrtINTEL"}, + {SpvOpArbitraryFloatCbrtINTEL, "OpArbitraryFloatCbrtINTEL"}, + {SpvOpArbitraryFloatHypotINTEL, "OpArbitraryFloatHypotINTEL"}, + {SpvOpArbitraryFloatSqrtINTEL, "OpArbitraryFloatSqrtINTEL"}, + {SpvOpArbitraryFloatLogINTEL, "OpArbitraryFloatLogINTEL"}, + {SpvOpArbitraryFloatLog2INTEL, "OpArbitraryFloatLog2INTEL"}, + {SpvOpArbitraryFloatLog10INTEL, "OpArbitraryFloatLog10INTEL"}, + {SpvOpArbitraryFloatLog1pINTEL, "OpArbitraryFloatLog1pINTEL"}, + {SpvOpArbitraryFloatExpINTEL, "OpArbitraryFloatExpINTEL"}, + {SpvOpArbitraryFloatExp2INTEL, "OpArbitraryFloatExp2INTEL"}, + {SpvOpArbitraryFloatExp10INTEL, "OpArbitraryFloatExp10INTEL"}, + {SpvOpArbitraryFloatExpm1INTEL, "OpArbitraryFloatExpm1INTEL"}, + {SpvOpArbitraryFloatSinINTEL, "OpArbitraryFloatSinINTEL"}, + {SpvOpArbitraryFloatCosINTEL, "OpArbitraryFloatCosINTEL"}, + {SpvOpArbitraryFloatSinCosINTEL, "OpArbitraryFloatSinCosINTEL"}, + {SpvOpArbitraryFloatSinPiINTEL, "OpArbitraryFloatSinPiINTEL"}, + {SpvOpArbitraryFloatCosPiINTEL, "OpArbitraryFloatCosPiINTEL"}, + {SpvOpArbitraryFloatASinINTEL, "OpArbitraryFloatASinINTEL"}, + {SpvOpArbitraryFloatASinPiINTEL, "OpArbitraryFloatASinPiINTEL"}, + {SpvOpArbitraryFloatACosINTEL, "OpArbitraryFloatACosINTEL"}, + {SpvOpArbitraryFloatACosPiINTEL, "OpArbitraryFloatACosPiINTEL"}, + {SpvOpArbitraryFloatATanINTEL, "OpArbitraryFloatATanINTEL"}, + {SpvOpArbitraryFloatATanPiINTEL, "OpArbitraryFloatATanPiINTEL"}, + {SpvOpArbitraryFloatATan2INTEL, "OpArbitraryFloatATan2INTEL"}, + {SpvOpArbitraryFloatPowINTEL, "OpArbitraryFloatPowINTEL"}, + {SpvOpArbitraryFloatPowRINTEL, "OpArbitraryFloatPowRINTEL"}, + {SpvOpArbitraryFloatPowNINTEL, "OpArbitraryFloatPowNINTEL"}, + {SpvOpLoopControlINTEL, "OpLoopControlINTEL"}, + {SpvOpAliasDomainDeclINTEL, "OpAliasDomainDeclINTEL"}, + {SpvOpAliasScopeDeclINTEL, "OpAliasScopeDeclINTEL"}, + {SpvOpAliasScopeListDeclINTEL, "OpAliasScopeListDeclINTEL"}, + {SpvOpFixedSqrtINTEL, "OpFixedSqrtINTEL"}, + {SpvOpFixedRecipINTEL, "OpFixedRecipINTEL"}, + {SpvOpFixedRsqrtINTEL, "OpFixedRsqrtINTEL"}, + {SpvOpFixedSinINTEL, "OpFixedSinINTEL"}, + {SpvOpFixedCosINTEL, "OpFixedCosINTEL"}, + {SpvOpFixedSinCosINTEL, "OpFixedSinCosINTEL"}, + {SpvOpFixedSinPiINTEL, "OpFixedSinPiINTEL"}, + {SpvOpFixedCosPiINTEL, "OpFixedCosPiINTEL"}, + {SpvOpFixedSinCosPiINTEL, "OpFixedSinCosPiINTEL"}, + {SpvOpFixedLogINTEL, "OpFixedLogINTEL"}, + {SpvOpFixedExpINTEL, "OpFixedExpINTEL"}, + {SpvOpPtrCastToCrossWorkgroupINTEL, "OpPtrCastToCrossWorkgroupINTEL"}, + {SpvOpCrossWorkgroupCastToPtrINTEL, "OpCrossWorkgroupCastToPtrINTEL"}, + {SpvOpReadPipeBlockingINTEL, "OpReadPipeBlockingINTEL"}, + {SpvOpWritePipeBlockingINTEL, "OpWritePipeBlockingINTEL"}, + {SpvOpFPGARegINTEL, "OpFPGARegINTEL"}, + {SpvOpRayQueryGetRayTMinKHR, "OpRayQueryGetRayTMinKHR"}, + {SpvOpRayQueryGetRayFlagsKHR, "OpRayQueryGetRayFlagsKHR"}, + {SpvOpRayQueryGetIntersectionTKHR, "OpRayQueryGetIntersectionTKHR"}, + {SpvOpRayQueryGetIntersectionInstanceCustomIndexKHR, "OpRayQueryGetIntersectionInstanceCustomIndexKHR"}, + {SpvOpRayQueryGetIntersectionInstanceIdKHR, "OpRayQueryGetIntersectionInstanceIdKHR"}, + {SpvOpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR, "OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR"}, + {SpvOpRayQueryGetIntersectionGeometryIndexKHR, "OpRayQueryGetIntersectionGeometryIndexKHR"}, + {SpvOpRayQueryGetIntersectionPrimitiveIndexKHR, "OpRayQueryGetIntersectionPrimitiveIndexKHR"}, + {SpvOpRayQueryGetIntersectionBarycentricsKHR, "OpRayQueryGetIntersectionBarycentricsKHR"}, + {SpvOpRayQueryGetIntersectionFrontFaceKHR, "OpRayQueryGetIntersectionFrontFaceKHR"}, + {SpvOpRayQueryGetIntersectionCandidateAABBOpaqueKHR, "OpRayQueryGetIntersectionCandidateAABBOpaqueKHR"}, + {SpvOpRayQueryGetIntersectionObjectRayDirectionKHR, "OpRayQueryGetIntersectionObjectRayDirectionKHR"}, + {SpvOpRayQueryGetIntersectionObjectRayOriginKHR, "OpRayQueryGetIntersectionObjectRayOriginKHR"}, + {SpvOpRayQueryGetWorldRayDirectionKHR, "OpRayQueryGetWorldRayDirectionKHR"}, + {SpvOpRayQueryGetWorldRayOriginKHR, "OpRayQueryGetWorldRayOriginKHR"}, + {SpvOpRayQueryGetIntersectionObjectToWorldKHR, "OpRayQueryGetIntersectionObjectToWorldKHR"}, + {SpvOpRayQueryGetIntersectionWorldToObjectKHR, "OpRayQueryGetIntersectionWorldToObjectKHR"}, + {SpvOpAtomicFAddEXT, "OpAtomicFAddEXT"}, + {SpvOpTypeBufferSurfaceINTEL, "OpTypeBufferSurfaceINTEL"}, + {SpvOpTypeStructContinuedINTEL, "OpTypeStructContinuedINTEL"}, + {SpvOpConstantCompositeContinuedINTEL, "OpConstantCompositeContinuedINTEL"}, + {SpvOpSpecConstantCompositeContinuedINTEL, "OpSpecConstantCompositeContinuedINTEL"}, + {SpvOpControlBarrierArriveINTEL, "OpControlBarrierArriveINTEL"}, + {SpvOpControlBarrierWaitINTEL, "OpControlBarrierWaitINTEL"}, + {SpvOpGroupIMulKHR, "OpGroupIMulKHR"}, + {SpvOpGroupFMulKHR, "OpGroupFMulKHR"}, + {SpvOpGroupBitwiseAndKHR, "OpGroupBitwiseAndKHR"}, + {SpvOpGroupBitwiseOrKHR, "OpGroupBitwiseOrKHR"}, + {SpvOpGroupBitwiseXorKHR, "OpGroupBitwiseXorKHR"}, + {SpvOpGroupLogicalAndKHR, "OpGroupLogicalAndKHR"}, + {SpvOpGroupLogicalOrKHR, "OpGroupLogicalOrKHR"}, + {SpvOpGroupLogicalXorKHR, "OpGroupLogicalXorKHR"}, + }; + + SpvOpNameMap::const_iterator entry = op_names.find(op); + if (entry != op_names.end()) { + return entry->second; + } + static const std::string invalid_op_name("*INVALID*"); + return invalid_op_name; +} + +// -- + +} // namespace } // namespace Internal } // namespace Halide @@ -1691,24 +3917,23 @@ void spirv_ir_test() { SpvInstruction void_inst = SpvFactory::void_type(void_type_id); builder.current_module().add_type(void_inst); - SpvId int_type_id = builder.map_type(Int(32)); - SpvId uint_type_id = builder.map_type(UInt(32)); - SpvId float_type_id = builder.map_type(Float(32)); + SpvId int_type_id = builder.declare_type(Int(32)); + SpvId uint_type_id = builder.declare_type(UInt(32)); + SpvId float_type_id = builder.declare_type(Float(32)); SpvBuilder::ParamTypes param_types = {int_type_id, uint_type_id, float_type_id}; - SpvFunction function = builder.add_function(void_type_id, param_types); + SpvId kernel_func_id = builder.add_function("kernel_func", void_type_id, param_types); + SpvFunction kernel_func = builder.lookup_function(kernel_func_id); - builder.enter_function(function); - SpvId intrinsic_type_id = builder.map_type(Type(Type::UInt, 32, 3)); - SpvId intrinsic_id = builder.add_global_variable(intrinsic_type_id, SpvStorageClassInput); + builder.enter_function(kernel_func); + SpvId intrinsic_type_id = builder.declare_type(Type(Type::UInt, 32, 3)); + SpvId intrinsic_id = builder.declare_global_variable("InputVar", intrinsic_type_id, SpvStorageClassInput); - SpvId output_type_id = builder.map_type(Type(Type::UInt, 32, 1)); - SpvId output_id = builder.add_global_variable(output_type_id, SpvStorageClassOutput); + SpvId output_type_id = builder.declare_type(Type(Type::UInt, 32, 1)); + SpvId output_id = builder.declare_global_variable("OutputVar", output_type_id, SpvStorageClassOutput); - SpvBuilder::Variables entry_point_variables; - entry_point_variables.push_back(intrinsic_id); - entry_point_variables.push_back(output_id); - builder.add_entry_point("entry_func", function.id(), SpvExecutionModelKernel, entry_point_variables); + SpvBuilder::Variables entry_point_variables = {intrinsic_id, output_id}; + builder.add_entry_point(kernel_func_id, SpvExecutionModelKernel, entry_point_variables); SpvBuilder::Literals annotation_literals = {SpvBuiltInWorkgroupId}; builder.add_annotation(intrinsic_id, SpvDecorationBuiltIn, annotation_literals); @@ -1717,7 +3942,7 @@ void spirv_ir_test() { builder.append(SpvFactory::load(intrinsic_type_id, intrinsic_loaded_id, intrinsic_id)); float float_value = 32.0f; - SpvId float_src_id = builder.declare_constant(Float(32), &float_value); + SpvId float_src_id = builder.add_constant(Float(32), &float_value); SpvId converted_value_id = builder.reserve_id(SpvResultId); builder.append(SpvFactory::convert(SpvOpConvertFToU, uint_type_id, converted_value_id, float_src_id)); builder.append(SpvFactory::store(output_id, converted_value_id)); diff --git a/src/SpirvIR.h b/src/SpirvIR.h index 0c3356162820..d92eaa696bd7 100644 --- a/src/SpirvIR.h +++ b/src/SpirvIR.h @@ -23,7 +23,8 @@ #include "IntrusivePtr.h" #include "Type.h" -#include // Use v1.0 spec as the minimal viable version (for maximum compatiblity) +#include // GLSL extended instructions for common intrinsics +#include // Use v1.6 headers but only use the minimal viable format version (for maximum compatiblity) namespace Halide { namespace Internal { @@ -34,6 +35,15 @@ enum SpvPrecision { SpvRelaxedPrecision, }; +/** Scope qualifiers for Execution & Memory operations */ +enum SpvScope { + SpvCrossDeviceScope = 0, + SpvDeviceScope = 1, + SpvWorkgroupScope = 2, + SpvSubgroupScope = 3, + SpvInvocationScope = 4 +}; + /** Specific types of predefined constants */ enum SpvPredefinedConstant { SpvNullConstant, @@ -48,6 +58,7 @@ enum SpvKind { SpvVoidTypeId, SpvBoolTypeId, SpvIntTypeId, + SpvUIntTypeId, SpvFloatTypeId, SpvVectorTypeId, SpvArrayTypeId, @@ -70,14 +81,28 @@ enum SpvKind { SpvBlockId, SpvLabelId, SpvParameterId, + SpvImportId, SpvModuleId, SpvUnknownItem, }; +/** Specific types of SPIR-V operand types */ +enum SpvValueType { + SpvInvalidValueType, + SpvOperandId, + SpvBitMaskLiteral, + SpvIntegerLiteral, + SpvIntegerData, + SpvFloatData, + SpvStringData, + SpvUnknownValueType +}; + /** SPIR-V requires all IDs to be 32-bit unsigned integers */ using SpvId = uint32_t; using SpvBinary = std::vector; +static constexpr SpvStorageClass SpvInvalidStorageClass = SpvStorageClassMax; // sentinel for invalid storage class static constexpr SpvId SpvInvalidId = SpvId(-1); static constexpr SpvId SpvNoResult = 0; static constexpr SpvId SpvNoType = 0; @@ -88,6 +113,7 @@ class SpvFunction; class SpvBlock; class SpvInstruction; class SpvBuilder; +struct SpvFactory; /** Pre-declarations for SPIR-V IR data structures */ struct SpvModuleContents; @@ -104,6 +130,11 @@ using SpvInstructionContentsPtr = IntrusivePtr; /** General interface for representing a SPIR-V Instruction */ class SpvInstruction { public: + using LiteralValue = std::pair; + using Immediates = std::vector; + using Operands = std::vector; + using ValueTypes = std::vector; + SpvInstruction() = default; ~SpvInstruction() = default; @@ -117,14 +148,22 @@ class SpvInstruction { void set_type_id(SpvId id); void set_op_code(SpvOp opcode); void add_operand(SpvId id); - void add_immediate(SpvId id); - void add_data(uint32_t bytes, const void *data); + void add_operands(const Operands &operands); + void add_immediate(SpvId id, SpvValueType type); + void add_immediates(const Immediates &Immediates); + void add_data(uint32_t bytes, const void *data, SpvValueType type); void add_string(const std::string &str); + template + void append(const T &operands_or_immediates_or_strings); + SpvId result_id() const; SpvId type_id() const; SpvOp op_code() const; - SpvId operand(uint32_t index); + SpvId operand(uint32_t index) const; + const void *data(uint32_t index = 0) const; + SpvValueType value_type(uint32_t index) const; + const Operands &operands() const; bool has_type() const; bool has_result() const; @@ -180,6 +219,9 @@ class SpvBlock { /** General interface for representing a SPIR-V Function */ class SpvFunction { public: + using Blocks = std::vector; + using Parameters = std::vector; + SpvFunction() = default; ~SpvFunction() = default; @@ -188,6 +230,7 @@ class SpvFunction { SpvFunction(SpvFunction &&) = default; SpvFunction &operator=(SpvFunction &&) = default; + SpvBlock create_block(SpvId block_id); void add_block(const SpvBlock &block); void add_parameter(const SpvInstruction ¶m); void set_module(SpvModule module); @@ -195,8 +238,11 @@ class SpvFunction { void set_parameter_precision(uint32_t index, SpvPrecision precision); bool is_defined() const; + const Blocks &blocks() const; SpvBlock entry_block() const; + SpvBlock tail_block() const; SpvPrecision return_precision() const; + const Parameters ¶meters() const; SpvPrecision parameter_precision(uint32_t index) const; uint32_t parameter_count() const; uint32_t control_mask() const; @@ -218,8 +264,14 @@ class SpvFunction { /** General interface for representing a SPIR-V code module */ class SpvModule { public: + using ImportDefinition = std::pair; + using ImportNames = std::vector; using EntryPointNames = std::vector; using Instructions = std::vector; + using Functions = std::vector; + using Capabilities = std::vector; + using Extensions = std::vector; + using Imports = std::vector; SpvModule() = default; ~SpvModule() = default; @@ -229,7 +281,8 @@ class SpvModule { SpvModule(SpvModule &&) = default; SpvModule &operator=(SpvModule &&) = default; - void add_debug(const SpvInstruction &val); + void add_debug_string(SpvId result_id, const std::string &string); + void add_debug_symbol(SpvId id, const std::string &symbol); void add_annotation(const SpvInstruction &val); void add_type(const SpvInstruction &val); void add_constant(const SpvInstruction &val); @@ -239,20 +292,43 @@ class SpvModule { void add_instruction(const SpvInstruction &val); void add_entry_point(const std::string &name, SpvInstruction entry_point); + void import_instruction_set(SpvId id, const std::string &instruction_set); void require_capability(SpvCapability val); void require_extension(const std::string &val); + void set_version_format(uint32_t version); void set_source_language(SpvSourceLanguage val); void set_addressing_model(SpvAddressingModel val); void set_memory_model(SpvMemoryModel val); + void set_binding_count(SpvId count); + + uint32_t version_format() const; SpvSourceLanguage source_language() const; SpvAddressingModel addressing_model() const; SpvMemoryModel memory_model() const; SpvInstruction entry_point(const std::string &name) const; EntryPointNames entry_point_names() const; + ImportNames import_names() const; + SpvId lookup_import(const std::string &Instruction_set) const; + uint32_t entry_point_count() const; + + Imports imports() const; + Extensions extensions() const; + Capabilities capabilities() const; + Instructions entry_points() const; const Instructions &execution_modes() const; + const Instructions &debug_source() const; + const Instructions &debug_symbols() const; + const Instructions &annotations() const; + const Instructions &type_definitions() const; + const Instructions &global_constants() const; + const Instructions &global_variables() const; + const Functions &function_definitions() const; + + uint32_t binding_count() const; SpvModule module() const; + bool is_imported(const std::string &instruction_set) const; bool is_capability_required(SpvCapability val) const; bool is_extension_required(const std::string &val) const; bool is_defined() const; @@ -276,6 +352,7 @@ class SpvModule { class SpvBuilder { public: using ParamTypes = std::vector; + using Components = std::vector; using StructMemberTypes = std::vector; using Variables = std::vector; using Indices = std::vector; @@ -287,101 +364,215 @@ class SpvBuilder { SpvBuilder(const SpvBuilder &) = delete; SpvBuilder &operator=(const SpvBuilder &) = delete; + // Reserve a unique ID to use for identifying a specifc kind of SPIR-V result **/ SpvId reserve_id(SpvKind = SpvResultId); - SpvKind kind_of(SpvId id); - SpvId map_type(const Type &type, uint32_t array_size = 1); - SpvId map_pointer_type(const Type &type, SpvStorageClass storage_class); - SpvId map_pointer_type(SpvId type_id, SpvStorageClass storage_class); - SpvId map_constant(const Type &type, const void *data); - SpvId map_null_constant(const Type &type); - SpvId map_bool_constant(bool value); - SpvId map_function_type(SpvId return_type, const ParamTypes ¶m_types = {}); + // Look up the specific kind of SPIR-V item from its unique ID + SpvKind kind_of(SpvId id) const; + // Get a human readable name for a specific kind of SPIR-V item + std::string kind_name(SpvKind kind) const; + + // Look up the ID associated with the type for a given variable ID + SpvId type_of(SpvId variable_id) const; + + // Top-Level declaration methods ... each of these is a convenvience + // function that checks to see if the requested thing has already been + // declared, in which case it returns its existing id, otherwise it + // adds a new declaration, and returns the new id. This avoids all + // the logic checks in the calling code, and also ensures that + // duplicates aren't created. + + SpvId declare_void_type(); SpvId declare_type(const Type &type, uint32_t array_size = 1); - SpvId declare_struct(const StructMemberTypes &member_types); - SpvId declare_runtime_array(SpvId base_type_id); SpvId declare_pointer_type(const Type &type, SpvStorageClass storage_class); - SpvId declare_pointer_type(SpvId base_type_id, SpvStorageClass storage_class); - SpvId declare_constant(const Type &type, const void *data); + SpvId declare_pointer_type(SpvId type_id, SpvStorageClass storage_class); + SpvId declare_constant(const Type &type, const void *data, bool is_specialization = false); SpvId declare_null_constant(const Type &type); SpvId declare_bool_constant(bool value); SpvId declare_string_constant(const std::string &str); + SpvId declare_integer_constant(const Type &type, int64_t value); + SpvId declare_float_constant(const Type &type, double value); SpvId declare_scalar_constant(const Type &type, const void *data); SpvId declare_vector_constant(const Type &type, const void *data); - SpvId declare_access_chain(SpvId ptr_type_id, SpvId base_id, SpvId element_id, const Indices &indices); - SpvId declare_function_type(SpvId return_type_id, const ParamTypes ¶m_type_ids); - - SpvFunction add_function(SpvId return_type, const ParamTypes ¶m_types = {}); + SpvId declare_specialization_constant(const Type &type, const void *data); + SpvId declare_access_chain(SpvId ptr_type_id, SpvId base_id, const Indices &indices); + SpvId declare_pointer_access_chain(SpvId ptr_type_id, SpvId base_id, SpvId element_id, const Indices &indices); + SpvId declare_function_type(SpvId return_type, const ParamTypes ¶m_types = {}); + SpvId declare_function(const std::string &name, SpvId function_type); + SpvId declare_struct(const std::string &name, const StructMemberTypes &member_types); + SpvId declare_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId initializer_id = SpvInvalidId); + SpvId declare_global_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId initializer_id = SpvInvalidId); + SpvId declare_symbol(const std::string &symbol, SpvId id, SpvId scope_id); + + // Top level creation methods for adding new items ... these have a limited + // number of checks and the caller must ensure that duplicates aren't created + SpvId add_type(const Type &type, uint32_t array_size = 1); + SpvId add_struct(const std::string &name, const StructMemberTypes &member_types); + SpvId add_array_with_default_size(SpvId base_type_id, SpvId array_size_id); + SpvId add_runtime_array(SpvId base_type_id); + SpvId add_pointer_type(const Type &type, SpvStorageClass storage_class); + SpvId add_pointer_type(SpvId base_type_id, SpvStorageClass storage_class); + SpvId add_constant(const Type &type, const void *data, bool is_specialization = false); + SpvId add_function_type(SpvId return_type_id, const ParamTypes ¶m_type_ids); + SpvId add_function(const std::string &name, SpvId return_type, const ParamTypes ¶m_types = {}); SpvId add_instruction(SpvInstruction val); + void add_annotation(SpvId target_id, SpvDecoration decoration_type, const Literals &literals = {}); void add_struct_annotation(SpvId struct_type_id, uint32_t member_index, SpvDecoration decoration_type, const Literals &literals = {}); + void add_symbol(const std::string &symbol, SpvId id, SpvId scope_id); - SpvId add_variable(SpvId type_id, uint32_t storage_class, SpvId initializer_id = SpvInvalidId); - SpvId add_global_variable(SpvId type_id, uint32_t storage_class, SpvId initializer_id = SpvInvalidId); + void add_entry_point(SpvId func_id, SpvExecutionModel exec_model, + const Variables &variables = {}); - SpvId map_struct(const StructMemberTypes &member_types); + // Define the execution mode with a fixed local size for the workgroup (using literal values) + void add_execution_mode_local_size(SpvId entry_point_id, uint32_t local_size_x, uint32_t local_size_y, uint32_t local_size_z); - void add_entry_point(const std::string &name, - SpvId func_id, SpvExecutionModel exec_model, - const Variables &variables = {}); + // Same as above but uses id's for the local size (to allow specialization constants to be used) + void add_execution_mode_local_size_id(SpvId entry_point_id, SpvId local_size_x, SpvId local_size_y, SpvId local_size_z); - void add_execution_mode_local_size(SpvId entry_point_id, uint32_t wg_size_x, uint32_t wg_size_y, uint32_t wg_size_z); + // Assigns a specific SPIR-V version format for output (needed for compatibility) + void set_version_format(uint32_t version); + // Assigns a specific source language hint to the module void set_source_language(SpvSourceLanguage val); + + // Sets the addressing model to use for the module void set_addressing_model(SpvAddressingModel val); + + // Sets the memory model to use for the module void set_memory_model(SpvMemoryModel val); + // Returns the source language hint for the module SpvSourceLanguage source_language() const; + + // Returns the addressing model used for the module SpvAddressingModel addressing_model() const; + + // Returns the memory model used for the module SpvMemoryModel memory_model() const; + // Import the GLSL.std.450 external instruction set. Returns its corresponding ID. + SpvId import_glsl_intrinsics(); + + // Import an external instruction set bby name. Returns its corresponding ID. + SpvId import_instruction_set(const std::string &instruction_set); + + // Add an extension string to the list of required extensions for the module + void require_extension(const std::string &extension); + + // Add a specific capability to the list of requirements for the module + void require_capability(SpvCapability); + + // Returns true if the given instruction set has been imported + bool is_imported(const std::string &instruction_set) const; + + // Returns true if the given extension string is required by the module + bool is_extension_required(const std::string &extension) const; + + // Returns true if the given capability is required by the module + bool is_capability_required(SpvCapability) const; + + // Change the current build location to the given block. All local + // declarations and instructions will be added here. void enter_block(const SpvBlock &block); + + // Create a new block with the given ID + SpvBlock create_block(SpvId block_id); + + // Returns the current block (the active scope for building) SpvBlock current_block() const; + + // Resets the block build scope, and unassigns the current block SpvBlock leave_block(); + // Change the current build scope to be within the given function void enter_function(const SpvFunction &func); + + // Returns the function object for the given ID (or an invalid function if none is found) SpvFunction lookup_function(SpvId func_id) const; + + // Returns the current function being used as the active build scope SpvFunction current_function() const; + + // Resets the function build scope, and unassigns the current function SpvFunction leave_function(); - void set_current_id(SpvId id); + // Returns the current id being used for building (ie the last item created) SpvId current_id() const; - SpvModule current_module() const; + // Updates the current id being used for building + void update_id(SpvId id); - void require_extension(const std::string &extension); - void require_capability(SpvCapability); + // Returns true if the given id is of the corresponding type + bool is_pointer_type(SpvId id) const; + bool is_struct_type(SpvId id) const; + bool is_vector_type(SpvId id) const; + bool is_scalar_type(SpvId id) const; + bool is_array_type(SpvId id) const; + bool is_constant(SpvId id) const; - bool is_extension_required(const std::string &extension) const; - bool is_capability_required(SpvCapability) const; + // Looks up the given pointer type id and returns a corresponding base type id (or an invalid id if none is found) + SpvId lookup_base_type(SpvId pointer_type) const; + + // Returns the storage class for the given variable id (or invalid if none is found) + SpvStorageClass lookup_storage_class(SpvId id) const; + + // Returns the item id for the given symbol name (or an invalid id if none is found) + SpvId lookup_id(const std::string &symbol) const; + + // Returns the build scope id for the item id (or an invalid id if none is found) + SpvId lookup_scope(SpvId id) const; + + // Returns the id for the imported instruction set (or an invalid id if none is found) + SpvId lookup_import(const std::string &instruction_set) const; + + // Returns the symbol string for the given id (or an empty string if none is found) + std::string lookup_symbol(SpvId id) const; + // Returns the current module being used for building + SpvModule current_module() const; + + // Appends the given instruction to the current build location void append(SpvInstruction inst); + + // Finalizes the module and prepares it for encoding (must be called before module can be used) + void finalize(); + + // Encodes the current module to the given binary void encode(SpvBinary &binary) const; + // Resets the builder and all internal state + void reset(); + protected: - using TypeKey = std::string; + using TypeKey = uint64_t; using TypeMap = std::unordered_map; using KindMap = std::unordered_map; using PointerTypeKey = std::pair; using PointerTypeMap = std::map; - using ConstantKey = std::string; + using BaseTypeMap = std::unordered_map; + using VariableTypeMap = std::unordered_map; + using StorageClassMap = std::unordered_map; + using ConstantKey = uint64_t; using ConstantMap = std::unordered_map; using StringMap = std::unordered_map; - using InstructionMap = std::unordered_map; - using FunctionTypeKey = std::string; + using ScopeMap = std::unordered_map; + using IdSymbolMap = std::unordered_map; + using SymbolIdMap = std::unordered_map; + using FunctionTypeKey = uint64_t; using FunctionTypeMap = std::unordered_map; using FunctionMap = std::unordered_map; - using FunctionStack = std::stack; - using BlockStack = std::stack; - SpvId declare_id(SpvKind kind); + // Internal methods for creating ids, keys, and look ups + + SpvId make_id(SpvKind kind); TypeKey make_type_key(const Type &type, uint32_t array_size = 1) const; SpvId lookup_type(const Type &type, uint32_t array_size = 1) const; TypeKey make_struct_type_key(const StructMemberTypes &member_types) const; - SpvId lookup_struct(const StructMemberTypes &member_types) const; + SpvId lookup_struct(const std::string &name, const StructMemberTypes &member_types) const; PointerTypeKey make_pointer_type_key(const Type &type, SpvStorageClass storage_class) const; SpvId lookup_pointer_type(const Type &type, SpvStorageClass storage_class) const; @@ -389,34 +580,48 @@ class SpvBuilder { PointerTypeKey make_pointer_type_key(SpvId base_type_id, SpvStorageClass storage_class) const; SpvId lookup_pointer_type(SpvId base_type_id, SpvStorageClass storage_class) const; - ConstantKey make_bool_constant_key(bool value) const; + template + SpvId declare_scalar_constant_of_type(const Type &scalar_type, const T *data); + + template + SpvId declare_specialization_constant_of_type(const Type &scalar_type, const T *data); - ConstantKey make_constant_key(const Type &type, const void *data) const; - SpvId lookup_constant(const Type &type, const void *data) const; + template + SpvBuilder::Components declare_constants_for_each_lane(Type type, const void *data); + + ConstantKey make_bool_constant_key(bool value) const; + ConstantKey make_string_constant_key(const std::string &value) const; + ConstantKey make_constant_key(uint8_t code, uint8_t bits, int lanes, size_t bytes, const void *data, bool is_specialization = false) const; + ConstantKey make_constant_key(const Type &type, const void *data, bool is_specialization = false) const; + SpvId lookup_constant(const Type &type, const void *data, bool is_specialization = false) const; ConstantKey make_null_constant_key(const Type &type) const; SpvId lookup_null_constant(const Type &type) const; - SpvId map_instruction(const SpvInstruction &inst); - SpvInstruction lookup_instruction(SpvId result_id) const; - bool has_instruction(SpvId inst) const; + SpvId lookup_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId scope_id) const; + bool has_variable(const std::string &name, SpvId type_id, SpvStorageClass storage_class, SpvId scope_id) const; FunctionTypeKey make_function_type_key(SpvId return_type_id, const ParamTypes ¶m_type_ids) const; SpvId lookup_function_type(SpvId return_type_id, const ParamTypes ¶m_type_ids) const; - SpvId scope_id = SpvInvalidId; + SpvId active_id = SpvInvalidId; + SpvFunction active_function; + SpvBlock active_block; SpvModule module; KindMap kind_map; TypeMap type_map; TypeMap struct_map; + ScopeMap scope_map; StringMap string_map; ConstantMap constant_map; FunctionMap function_map; - InstructionMap instruction_map; + IdSymbolMap id_symbol_map; + SymbolIdMap symbol_id_map; + BaseTypeMap base_type_map; + StorageClassMap storage_class_map; PointerTypeMap pointer_type_map; + VariableTypeMap variable_type_map; FunctionTypeMap function_type_map; - FunctionStack function_stack; - BlockStack block_stack; }; /** Factory interface for constructing specific SPIR-V instructions */ @@ -427,14 +632,19 @@ struct SpvFactory { using Components = std::vector; using ParamTypes = std::vector; using MemberTypeIds = std::vector; + using Operands = std::vector; using Variables = std::vector; using VariableBlockIdPair = std::pair; // (Variable Id, Block Id) using BlockVariables = std::vector; + static SpvInstruction no_op(SpvId result_id); static SpvInstruction capability(const SpvCapability &capability); static SpvInstruction extension(const std::string &extension); - static SpvInstruction import(const std::string &import); + static SpvInstruction import(SpvId instruction_set_id, const std::string &instruction_set_name); static SpvInstruction label(SpvId result_id); + static SpvInstruction debug_line(SpvId string_id, uint32_t line, uint32_t column); + static SpvInstruction debug_string(SpvId result_id, const std::string &string); + static SpvInstruction debug_symbol(SpvId target_id, const std::string &symbol); static SpvInstruction decorate(SpvId target_id, SpvDecoration decoration_type, const Literals &literals = {}); static SpvInstruction decorate_member(SpvId struct_type_id, uint32_t member_index, SpvDecoration decoration_type, const Literals &literals = {}); static SpvInstruction void_type(SpvId void_type_id); @@ -442,15 +652,17 @@ struct SpvFactory { static SpvInstruction integer_type(SpvId int_type_id, uint32_t bits, uint32_t signedness); static SpvInstruction float_type(SpvId float_type_id, uint32_t bits); static SpvInstruction vector_type(SpvId vector_type_id, SpvId element_type_id, uint32_t vector_size); - static SpvInstruction array_type(SpvId array_type_id, SpvId element_type_id, uint32_t array_size); + static SpvInstruction array_type(SpvId array_type_id, SpvId element_type_id, SpvId array_size_id); static SpvInstruction struct_type(SpvId result_id, const MemberTypeIds &member_type_ids); static SpvInstruction runtime_array_type(SpvId result_type_id, SpvId base_type_id); static SpvInstruction pointer_type(SpvId pointer_type_id, SpvStorageClass storage_class, SpvId base_type_id); static SpvInstruction function_type(SpvId function_type_id, SpvId return_type_id, const ParamTypes ¶m_type_ids); - static SpvInstruction constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data); + static SpvInstruction constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data, SpvValueType value_type); static SpvInstruction null_constant(SpvId result_id, SpvId type_id); static SpvInstruction bool_constant(SpvId result_id, SpvId type_id, bool value); + static SpvInstruction string_constant(SpvId result_id, const std::string &value); static SpvInstruction composite_constant(SpvId result_id, SpvId type_id, const Components &components); + static SpvInstruction specialization_constant(SpvId result_id, SpvId type_id, size_t bytes, const void *data, SpvValueType value_type); static SpvInstruction variable(SpvId result_id, SpvId result_type_id, uint32_t storage_class, SpvId initializer_id = SpvInvalidId); static SpvInstruction function(SpvId return_type_id, SpvId func_id, uint32_t control_mask, SpvId func_type_id); static SpvInstruction function_parameter(SpvId param_type_id, SpvId param_id); @@ -458,18 +670,39 @@ struct SpvFactory { static SpvInstruction return_stmt(SpvId return_value_id = SpvInvalidId); static SpvInstruction entry_point(SpvId exec_model, SpvId func_id, const std::string &name, const Variables &variables); static SpvInstruction memory_model(SpvAddressingModel addressing_model, SpvMemoryModel memory_model); - static SpvInstruction exec_mode_local_size(SpvId function_id, uint32_t wg_size_x, uint32_t wg_size_y, uint32_t wg_size_z); - static SpvInstruction control_barrier(SpvId execution_scope_id, SpvId memory_scope_id, uint32_t semantics_mask); + static SpvInstruction exec_mode_local_size(SpvId function_id, uint32_t local_size_size_x, uint32_t local_size_size_y, uint32_t local_size_size_z); + static SpvInstruction exec_mode_local_size_id(SpvId function_id, SpvId local_size_x_id, SpvId local_size_y_id, SpvId local_size_z_id); // only avail in 1.2 + static SpvInstruction memory_barrier(SpvId memory_scope_id, SpvId semantics_mask_id); + static SpvInstruction control_barrier(SpvId execution_scope_id, SpvId memory_scope_id, SpvId semantics_mask_id); + static SpvInstruction bitwise_not(SpvId type_id, SpvId result_id, SpvId src_id); + static SpvInstruction bitwise_and(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); static SpvInstruction logical_not(SpvId type_id, SpvId result_id, SpvId src_id); + static SpvInstruction logical_and(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); + static SpvInstruction shift_right_logical(SpvId type_id, SpvId result_id, SpvId src_id, SpvId shift_id); + static SpvInstruction shift_right_arithmetic(SpvId type_id, SpvId result_id, SpvId src_id, SpvId shift_id); static SpvInstruction multiply_extended(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed); static SpvInstruction select(SpvId type_id, SpvId result_id, SpvId condition_id, SpvId true_id, SpvId false_id); - static SpvInstruction in_bounds_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, SpvId element_id, const Indices &indices); + static SpvInstruction in_bounds_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, const Indices &indices); + static SpvInstruction pointer_access_chain(SpvId type_id, SpvId result_id, SpvId base_id, SpvId element_id, const Indices &indices); static SpvInstruction load(SpvId type_id, SpvId result_id, SpvId ptr_id, uint32_t access_mask = 0x0); static SpvInstruction store(SpvId ptr_id, SpvId obj_id, uint32_t access_mask = 0x0); - static SpvInstruction vector_insert_dynamic(SpvId result_id, SpvId vector_id, SpvId value_id, uint32_t index); + static SpvInstruction vector_insert_dynamic(SpvId type_id, SpvId result_id, SpvId vector_id, SpvId value_id, SpvId index_id); + static SpvInstruction vector_extract_dynamic(SpvId type_id, SpvId result_id, SpvId vector_id, SpvId value_id, SpvId index_id); + static SpvInstruction vector_shuffle(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, const Indices &indices); + static SpvInstruction composite_insert(SpvId type_id, SpvId result_id, SpvId object_id, SpvId composite_id, const SpvFactory::Indices &indices); static SpvInstruction composite_extract(SpvId type_id, SpvId result_id, SpvId composite_id, const Indices &indices); + static SpvInstruction composite_construct(SpvId type_id, SpvId result_id, const Components &constituents); + static SpvInstruction is_inf(SpvId type_id, SpvId result_id, SpvId src_id); + static SpvInstruction is_nan(SpvId type_id, SpvId result_id, SpvId src_id); static SpvInstruction bitcast(SpvId type_id, SpvId result_id, SpvId src_id); + static SpvInstruction float_add(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); static SpvInstruction integer_add(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); + static SpvInstruction integer_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); + static SpvInstruction integer_not_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); + static SpvInstruction integer_less_than(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed); + static SpvInstruction integer_less_than_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed); + static SpvInstruction integer_greater_than(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed); + static SpvInstruction integer_greater_than_equal(SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id, bool is_signed); static SpvInstruction branch(SpvId target_label_id); static SpvInstruction conditional_branch(SpvId condition_label_id, SpvId true_label_id, SpvId false_label_id, const BranchWeights &weights = {}); static SpvInstruction loop_merge(SpvId merge_label_id, SpvId continue_label_id, uint32_t loop_control_mask = SpvLoopControlMaskNone); @@ -478,18 +711,19 @@ struct SpvFactory { static SpvInstruction unary_op(SpvOp op_code, SpvId type_id, SpvId result_id, SpvId src_id); static SpvInstruction binary_op(SpvOp op_code, SpvId type_id, SpvId result_id, SpvId src_a_id, SpvId src_b_id); static SpvInstruction convert(SpvOp op_code, SpvId type_id, SpvId result_id, SpvId src_id); + static SpvInstruction extended(SpvId instruction_set_id, SpvId instruction_number, SpvId type_id, SpvId result_id, const SpvFactory::Operands &operands); }; /** Contents of a SPIR-V Instruction */ struct SpvInstructionContents { using Operands = std::vector; - using Immediates = std::vector; + using ValueTypes = std::vector; mutable RefCount ref_count; SpvOp op_code = SpvOpNop; SpvId result_id = SpvNoResult; SpvId type_id = SpvNoType; Operands operands; - Immediates immediates; + ValueTypes value_types; SpvBlock block; }; @@ -529,13 +763,15 @@ struct SpvFunctionContents { struct SpvModuleContents { using Capabilities = std::set; using Extensions = std::set; - using Imports = std::set; + using Imports = std::unordered_map; using Functions = std::vector; using Instructions = std::vector; using EntryPoints = std::unordered_map; mutable RefCount ref_count; SpvId module_id = SpvInvalidId; + SpvId version_format = SpvVersion; + SpvId binding_count = 0; SpvSourceLanguage source_language = SpvSourceLanguageUnknown; SpvAddressingModel addressing_model = SpvAddressingModelLogical; SpvMemoryModel memory_model = SpvMemoryModelSimple; @@ -544,7 +780,8 @@ struct SpvModuleContents { Imports imports; EntryPoints entry_points; Instructions execution_modes; - Instructions debug; + Instructions debug_source; + Instructions debug_symbols; Instructions annotations; Instructions types; Instructions constants; @@ -553,6 +790,23 @@ struct SpvModuleContents { Instructions instructions; }; +/** Helper functions for determining calling convention of GLSL builtins **/ +bool is_glsl_unary_op(SpvId glsl_op_code); +bool is_glsl_binary_op(SpvId glsl_op_code); +uint32_t glsl_operand_count(SpvId glsl_op_code); + +/** Output the contents of a SPIR-V module in human-readable form **/ +std::ostream &operator<<(std::ostream &stream, const SpvModule &); + +/** Output the definition of a SPIR-V function in human-readable form **/ +std::ostream &operator<<(std::ostream &stream, const SpvFunction &); + +/** Output the contents of a SPIR-V block in human-readable form **/ +std::ostream &operator<<(std::ostream &stream, const SpvBlock &); + +/** Output a SPIR-V instruction in human-readable form **/ +std::ostream &operator<<(std::ostream &stream, const SpvInstruction &); + } // namespace Internal } // namespace Halide diff --git a/src/Target.cpp b/src/Target.cpp index c33301c67400..4d02eb86d3a6 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -369,6 +369,29 @@ Target::Feature get_host_cuda_capability(Target t) { return cap; } +Target::Feature calculate_host_vulkan_capability(Target t) { + const auto *interface = get_device_interface_for_device_api(DeviceAPI::Vulkan, t); + internal_assert(interface->compute_capability); + int major, minor; + int err = interface->compute_capability(nullptr, &major, &minor); + internal_assert(err == 0) << "Failed to query vulkan compute capability\n"; + int ver = major * 10 + minor; + if (ver < 10) { + return Target::FeatureEnd; + } else if (ver < 12) { + return Target::VulkanV10; + } else if (ver < 13) { + return Target::VulkanV12; + } else { + return Target::VulkanV13; + } +} + +Target::Feature get_host_vulkan_capability(Target t) { + static Target::Feature cap = calculate_host_vulkan_capability(t); + return cap; +} + const std::map os_name_map = { {"os_unknown", Target::OSUnknown}, {"linux", Target::Linux}, @@ -519,6 +542,15 @@ const std::map feature_name_map = { {"sanitizer_coverage", Target::SanitizerCoverage}, {"profile_by_timer", Target::ProfileByTimer}, {"spirv", Target::SPIRV}, + {"vulkan", Target::Vulkan}, + {"vk_int8", Target::VulkanInt8}, + {"vk_int16", Target::VulkanInt16}, + {"vk_int64", Target::VulkanInt64}, + {"vk_float16", Target::VulkanFloat16}, + {"vk_float64", Target::VulkanFloat64}, + {"vk_v10", Target::VulkanV10}, + {"vk_v12", Target::VulkanV12}, + {"vk_v13", Target::VulkanV13}, {"semihosting", Target::Semihosting}, // NOTE: When adding features to this map, be sure to update PyEnums.cpp as well. }; @@ -669,6 +701,15 @@ bool merge_string(Target &t, const std::string &target) { t.set_feature(get_host_cuda_capability(t)); } + if (is_host && + t.has_feature(Target::Vulkan) && + !t.has_feature(Target::VulkanV10) && + !t.has_feature(Target::VulkanV12) && + !t.has_feature(Target::VulkanV13)) { + // Detect host vulkan capability + t.set_feature(get_host_vulkan_capability(t)); + } + if (arch_specified && !bits_specified) { return false; } @@ -860,6 +901,9 @@ bool Target::supported() const { #if !defined(WITH_D3D12) bad |= has_feature(Target::D3D12Compute); #endif +#if !defined(WITH_VULKAN) + bad |= has_feature(Target::Vulkan); +#endif #if !defined(WITH_WEBGPU) bad |= has_feature(Target::WebGPU); #endif @@ -928,6 +972,7 @@ bool Target::has_gpu_feature() const { has_feature(Metal) || has_feature(D3D12Compute) || has_feature(OpenGLCompute) || + has_feature(Vulkan) || has_feature(WebGPU)); } @@ -965,18 +1010,36 @@ int Target::get_cuda_capability_lower_bound() const { return 20; } +int Target::get_vulkan_capability_lower_bound() const { + if (!has_feature(Target::Vulkan)) { + return -1; + } + if (has_feature(Target::VulkanV10)) { + return 10; + } + if (has_feature(Target::VulkanV12)) { + return 12; + } + if (has_feature(Target::VulkanV13)) { + return 13; + } + return 10; +} + bool Target::supports_type(const Type &t) const { if (t.bits() == 64) { if (t.is_float()) { - return !has_feature(Metal) && - !has_feature(OpenGLCompute) && - !has_feature(D3D12Compute) && - !has_feature(WebGPU) && - (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)); + return (!has_feature(Metal) && + !has_feature(OpenGLCompute) && + !has_feature(D3D12Compute) && + (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) && + (!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) && + !has_feature(WebGPU)); } else { return (!has_feature(Metal) && !has_feature(OpenGLCompute) && !has_feature(D3D12Compute) && + (!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) && !has_feature(WebGPU)); } } @@ -1008,6 +1071,18 @@ bool Target::supports_type(const Type &t, DeviceAPI device) const { return t.bits() < 64; } else if (device == DeviceAPI::OpenGLCompute) { return t.bits() < 64; + } else if (device == DeviceAPI::Vulkan) { + if (t.is_float() && t.bits() == 64) { + return has_feature(Target::VulkanFloat64); + } else if (t.is_float() && t.bits() == 16) { + return has_feature(Target::VulkanFloat16); + } else if (t.is_int_or_uint() && t.bits() == 64) { + return has_feature(Target::VulkanInt64); + } else if (t.is_int_or_uint() && t.bits() == 16) { + return has_feature(Target::VulkanInt16); + } else if (t.is_int_or_uint() && t.bits() == 8) { + return has_feature(Target::VulkanInt8); + } } else if (device == DeviceAPI::WebGPU) { return t.bits() < 64; } @@ -1054,6 +1129,9 @@ DeviceAPI Target::get_required_device_api() const { if (has_feature(Target::OpenGLCompute)) { return DeviceAPI::OpenGLCompute; } + if (has_feature(Target::Vulkan)) { + return DeviceAPI::Vulkan; + } if (has_feature(Target::WebGPU)) { return DeviceAPI::WebGPU; } @@ -1074,6 +1152,8 @@ Target::Feature target_feature_for_device_api(DeviceAPI api) { return Target::HVX; case DeviceAPI::D3D12Compute: return Target::D3D12Compute; + case DeviceAPI::Vulkan: + return Target::Vulkan; case DeviceAPI::WebGPU: return Target::WebGPU; default: @@ -1158,7 +1238,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) // (c) must match across both targets; it is an error if one target has the feature and the other doesn't // clang-format off - const std::array union_features = {{ + const std::array union_features = {{ // These are true union features. CUDA, D3D12Compute, @@ -1166,6 +1246,7 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) NoNEON, OpenCL, OpenGLCompute, + Vulkan, WebGPU, // These features are actually intersection-y, but because targets only record the _highest_, @@ -1182,6 +1263,9 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) HVX_v62, HVX_v65, HVX_v66, + VulkanV10, + VulkanV12, + VulkanV13, }}; // clang-format on @@ -1291,6 +1375,22 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result) output.features.reset(CUDACapability86); } + // Pick tight lower bound for Vulkan capability. Use fall-through to clear redundant features + int vulkan_a = get_vulkan_capability_lower_bound(); + int vulkan_b = other.get_vulkan_capability_lower_bound(); + + // Same trick as above for CUDA + int vulkan_capability = std::min((unsigned)vulkan_a, (unsigned)vulkan_b); + if (vulkan_capability < 10) { + output.features.reset(VulkanV10); + } + if (vulkan_capability < 12) { + output.features.reset(VulkanV12); + } + if (vulkan_capability < 13) { + output.features.reset(VulkanV13); + } + // Pick tight lower bound for HVX version. Use fall-through to clear redundant features int hvx_a = get_hvx_lower_bound(*this); int hvx_b = get_hvx_lower_bound(other); @@ -1331,6 +1431,9 @@ void target_test() { {{"x86-64-linux-cuda", "x86-64-linux", "x86-64-linux-cuda"}}, {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda", "x86-64-linux-cuda"}}, {{"x86-64-linux-cuda-cuda_capability_50", "x86-64-linux-cuda-cuda_capability_30", "x86-64-linux-cuda-cuda_capability_30"}}, + {{"x86-64-linux-vulkan", "x86-64-linux", "x86-64-linux-vulkan"}}, + {{"x86-64-linux-vulkan-vk_v13", "x86-64-linux-vulkan", "x86-64-linux-vulkan"}}, + {{"x86-64-linux-vulkan-vk_v13", "x86-64-linux-vulkan-vk_v10", "x86-64-linux-vulkan-vk_v10"}}, {{"hexagon-32-qurt-hvx_v65", "hexagon-32-qurt-hvx_v62", "hexagon-32-qurt-hvx_v62"}}, {{"hexagon-32-qurt-hvx_v62", "hexagon-32-qurt", "hexagon-32-qurt"}}, {{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt", ""}}, diff --git a/src/Target.h b/src/Target.h index 2efd2cce2782..b27f4b73a99d 100644 --- a/src/Target.h +++ b/src/Target.h @@ -157,6 +157,15 @@ struct Target { SanitizerCoverage = halide_target_feature_sanitizer_coverage, ProfileByTimer = halide_target_feature_profile_by_timer, SPIRV = halide_target_feature_spirv, + Vulkan = halide_target_feature_vulkan, + VulkanInt8 = halide_target_feature_vulkan_int8, + VulkanInt16 = halide_target_feature_vulkan_int16, + VulkanInt64 = halide_target_feature_vulkan_int64, + VulkanFloat16 = halide_target_feature_vulkan_float16, + VulkanFloat64 = halide_target_feature_vulkan_float64, + VulkanV10 = halide_target_feature_vulkan_version10, + VulkanV12 = halide_target_feature_vulkan_version12, + VulkanV13 = halide_target_feature_vulkan_version13, Semihosting = halide_target_feature_semihosting, FeatureEnd = halide_target_feature_end }; @@ -320,6 +329,11 @@ struct Target { * features are set. */ int get_cuda_capability_lower_bound() const; + /** Get the minimum Vulkan capability found as an integer. Returns + * 10 (our minimum supported Vulkan compute capability) if no Vulkan + * features are set. */ + int get_vulkan_capability_lower_bound() const; + /** Was libHalide compiled with support for this target? */ bool supported() const; diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index cd0bd9d8cdda..71af475c2eb4 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -78,6 +78,7 @@ set(RUNTIME_CPP to_string trace_helper tracing + vulkan wasm_cpu_features # TODO(https://github.com/halide/Halide/issues/7248) # webgpu @@ -93,6 +94,7 @@ set(RUNTIME_CPP windows_profiler windows_threads windows_threads_tsan + windows_vulkan windows_yield write_debug_image x86_cpu_features @@ -135,6 +137,7 @@ set(RUNTIME_HEADER_FILES HalideRuntimeOpenCL.h HalideRuntimeOpenGLCompute.h HalideRuntimeQurt.h + HalideRuntimeVulkan.h HalideRuntimeWebGPU.h ) @@ -229,7 +232,12 @@ foreach (i IN LISTS RUNTIME_CPP) set(SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/${i}.cpp") - set(RUNTIME_DEFINES -DCOMPILING_HALIDE_RUNTIME -DBITS_${j}) + set(RUNTIME_DEFINES -DCOMPILING_HALIDE_RUNTIME -DBITS_${j} + -DHALIDE_VERSION=${Halide_VERSION} + -DHALIDE_VERSION_MAJOR=${Halide_VERSION_MAJOR} + -DHALIDE_VERSION_MINOR=${Halide_VERSION_MINOR} + -DHALIDE_VERSION_PATCH=${Halide_VERSION_PATCH}) + set(RUNTIME_DEFINES_debug -g -DDEBUG_RUNTIME ${RUNTIME_DEFINES}) foreach (SUFFIX IN ITEMS "" "_debug") diff --git a/src/runtime/HalideRuntime.h b/src/runtime/HalideRuntime.h index 6917abf4ee4b..d0c8e4e9fc4e 100644 --- a/src/runtime/HalideRuntime.h +++ b/src/runtime/HalideRuntime.h @@ -1393,6 +1393,15 @@ typedef enum halide_target_feature_t { halide_target_feature_sanitizer_coverage, ///< Enable hooks for SanitizerCoverage support. halide_target_feature_profile_by_timer, ///< Alternative to halide_target_feature_profile using timer interrupt for systems without threads or applicartions that need to avoid them. halide_target_feature_spirv, ///< Enable SPIR-V code generation support. + halide_target_feature_vulkan, ///< Enable Vulkan runtime support. + halide_target_feature_vulkan_int8, ///< Enable Vulkan 8-bit integer support. + halide_target_feature_vulkan_int16, ///< Enable Vulkan 16-bit integer support. + halide_target_feature_vulkan_int64, ///< Enable Vulkan 64-bit integer support. + halide_target_feature_vulkan_float16, ///< Enable Vulkan 16-bit float support. + halide_target_feature_vulkan_float64, ///< Enable Vulkan 64-bit float support. + halide_target_feature_vulkan_version10, ///< Enable Vulkan v1.0 runtime target support. + halide_target_feature_vulkan_version12, ///< Enable Vulkan v1.2 runtime target support. + halide_target_feature_vulkan_version13, ///< Enable Vulkan v1.3 runtime target support. halide_target_feature_semihosting, ///< Used together with Target::NoOS for the baremetal target built with semihosting library and run with semihosting mode where minimum I/O communication with a host PC is available. halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing. } halide_target_feature_t; diff --git a/src/runtime/HalideRuntimeVulkan.h b/src/runtime/HalideRuntimeVulkan.h new file mode 100644 index 000000000000..4e52da4fb034 --- /dev/null +++ b/src/runtime/HalideRuntimeVulkan.h @@ -0,0 +1,108 @@ +#ifndef HALIDE_HALIDERUNTIMEVULKAN_H +#define HALIDE_HALIDERUNTIMEVULKAN_H + +// Don't include HalideRuntime.h if the contents of it were already pasted into a generated header above this one +#ifndef HALIDE_HALIDERUNTIME_H + +#include "HalideRuntime.h" + +#endif +/** \file + * Routines specific to the Halide Vulkan runtime. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define HALIDE_RUNTIME_VULKAN + +extern const struct halide_device_interface_t *halide_vulkan_device_interface(); + +/** These are forward declared here to allow clients to override the + * Halide Vulkan runtime. Do not call them. */ +// @{ +extern int halide_vulkan_initialize_kernels(void *user_context, void **state_ptr, + const char *src, int size); + +extern int halide_vulkan_run(void *user_context, + void *state_ptr, + const char *entry_name, + int blocksX, int blocksY, int blocksZ, + int threadsX, int threadsY, int threadsZ, + int shared_mem_bytes, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + +extern void halide_vulkan_finalize_kernels(void *user_context, void *state_ptr); + +// @} + +// The default implementation of halide_acquire_vulkan_context uses +// the global pointers above, and serializes access with a spin lock. +// Overriding implementations of acquire/release must implement the +// following behavior: + +// - halide_acquire_vulkan_context should always store a valid +// instance/device/queue in the corresponding out parameters, +// or return an error code. +// - A call to halide_acquire_vulkan_context is followed by a matching +// call to halide_release_vulkan_context. halide_acquire_vulkan_context +// should block while a previous call (if any) has not yet been +// released via halide_release_vulkan_context. +// - Parameters: +// allocator: an internal halide type handle used for allocating resources +// instance: the vulkan instance handle +// device: the vulkan device handle +// physical_device: the vulkan physical device handle +// command_pool: the vulkan command pool handle (strangely doesn't have a VkCommandPool_T typedef) +// queue: the vulkan queue handle +// queue_family_index: the index corresponding to the device queue properties for the device (as described by vkGetPhysicalDeviceQueueFamilyProperties) +// create: if set to true, attempt to create a new vulkan context, otherwise acquire the current one +struct halide_vulkan_memory_allocator; +extern int halide_vulkan_acquire_context(void *user_context, + struct halide_vulkan_memory_allocator **allocator, + struct VkInstance_T **instance, + struct VkDevice_T **device, + struct VkPhysicalDevice_T **physical_device, + uint64_t *command_pool, + struct VkQueue_T **queue, + uint32_t *queue_family_index, + bool create = true); + +extern int halide_vulkan_release_context(void *user_context, + struct VkInstance_T *instance, + struct VkDevice_T *device, + struct VkQueue_T *queue); + +// -- + +// Override the default allocation callbacks (default uses Vulkan runtime implementation) +extern void halide_vulkan_set_allocation_callbacks(const struct VkAllocationCallbacks *callbacks); + +// Access the current allocation callbacks +// -- may return nullptr ... which indicates the default Vulkan runtime implementation is being used) +extern const struct VkAllocationCallbacks *halide_vulkan_get_allocation_callbacks(void *user_context); + +// Access methods to assign/retrieve required layer names for the context +extern void halide_vulkan_set_layer_names(const char *n); +extern const char *halide_vulkan_get_layer_names(void *user_context); + +// Access methods to assign/retrieve required externsion names for the context +extern void halide_vulkan_set_extension_names(const char *n); +extern const char *halide_vulkan_get_extension_names(void *user_context); + +// Access methods to assign/retrieve required device type names for the context (either "cpu", "gpu" (any), "discrete-gpu" (only), "virtual-gpu" (sw)) +extern void halide_vulkan_set_device_type(const char *n); +extern const char *halide_vulkan_get_device_type(void *user_context); + +// Access methods to assign/retrieve specific build options to the Vulkan runtime compiler +extern void halide_vulkan_set_build_options(const char *n); +extern const char *halide_vulkan_get_build_options(void *user_context); + +#ifdef __cplusplus +} // End extern "C" +#endif + +#endif // HALIDE_HALIDERUNTIMEVULKAN_H diff --git a/src/runtime/internal/block_allocator.h b/src/runtime/internal/block_allocator.h index 89824c9f7e63..3ff850e5b19f 100644 --- a/src/runtime/internal/block_allocator.h +++ b/src/runtime/internal/block_allocator.h @@ -1,10 +1,10 @@ #ifndef HALIDE_RUNTIME_BLOCK_ALLOCATOR_H #define HALIDE_RUNTIME_BLOCK_ALLOCATOR_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" +#include "../printer.h" #include "linked_list.h" #include "memory_resources.h" -#include "printer.h" #include "region_allocator.h" namespace Halide { @@ -42,9 +42,11 @@ class BlockAllocator { // Runtime configuration parameters to adjust the behaviour of the block allocator struct Config { size_t initial_capacity = 0; - size_t minimum_block_size = 0; - size_t maximum_block_size = 0; - size_t maximum_block_count = 0; + size_t maximum_pool_size = 0; //< Maximum number of bytes to allocate for the entire pool (including all blocks). Specified in bytes. Zero means no constraint + size_t minimum_block_size = 0; //< Minimum block size in bytes. Zero mean no constraint. + size_t maximum_block_size = 0; //< Maximum block size in bytes. Zero means no constraint + size_t maximum_block_count = 0; //< Maximum number of blocks to allocate. Zero means no constraint + size_t nearest_multiple = 0; //< Always round up the requested region sizes to the given integer value. Zero means no constraint }; // Factory methods for creation / destruction @@ -53,16 +55,19 @@ class BlockAllocator { // Public interface methods MemoryRegion *reserve(void *user_context, const MemoryRequest &request); - void reclaim(void *user_context, MemoryRegion *region); - bool collect(void *user_context); //< returns true if any blocks were removed - void release(void *user_context); - void destroy(void *user_context); + int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *region); //< retain the region and increase the usage count + bool collect(void *user_context); //< returns true if any blocks were removed + int release(void *user_context); + int destroy(void *user_context); // Access methods const MemoryAllocators ¤t_allocators() const; const Config ¤t_config() const; const Config &default_config() const; size_t block_count() const; + size_t pool_size() const; private: // Linked-list for storing the block resources @@ -78,7 +83,7 @@ class BlockAllocator { RegionAllocator *create_region_allocator(void *user_context, BlockResource *block); // Destroys the given region allocator and all associated memory regions - void destroy_region_allocator(void *user_context, RegionAllocator *region_allocator); + int destroy_region_allocator(void *user_context, RegionAllocator *region_allocator); // Reserves a block of memory for the requested size and returns the corresponding block entry, or nullptr on failure BlockEntry *reserve_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated); @@ -90,16 +95,16 @@ class BlockAllocator { BlockEntry *create_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated); // Releases the block entry from being used, and makes it available for further allocations - void release_block_entry(void *user_context, BlockEntry *block_entry); + int release_block_entry(void *user_context, BlockEntry *block_entry); // Destroys the block entry and removes it from the list - void destroy_block_entry(void *user_context, BlockEntry *block_entry); + int destroy_block_entry(void *user_context, BlockEntry *block_entry); // Invokes the allocation callback to allocate memory for the block region - void alloc_memory_block(void *user_context, BlockResource *block); + int alloc_memory_block(void *user_context, BlockResource *block); // Invokes the deallocation callback to free memory for the memory block - void free_memory_block(void *user_context, BlockResource *block); + int free_memory_block(void *user_context, BlockResource *block); // Returns a constrained size for the requested size based on config parameters size_t constrain_requested_size(size_t size) const; @@ -107,13 +112,16 @@ class BlockAllocator { // Returns true if the given block is compatible with the given properties bool is_compatible_block(const BlockResource *block, const MemoryProperties &properties) const; + // Returns true if the given block is suitable for the request allocation + bool is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryProperties &properties, size_t size, bool dedicated) const; + Config config; LinkedList block_list; MemoryAllocators allocators; }; BlockAllocator *BlockAllocator::create(void *user_context, const Config &cfg, const MemoryAllocators &allocators) { - halide_debug_assert(user_context, allocators.system.allocate != nullptr); + halide_abort_if_false(user_context, allocators.system.allocate != nullptr); BlockAllocator *result = reinterpret_cast( allocators.system.allocate(user_context, sizeof(BlockAllocator))); @@ -127,10 +135,10 @@ BlockAllocator *BlockAllocator::create(void *user_context, const Config &cfg, co } void BlockAllocator::destroy(void *user_context, BlockAllocator *instance) { - halide_debug_assert(user_context, instance != nullptr); + halide_abort_if_false(user_context, instance != nullptr); const MemoryAllocators &allocators = instance->allocators; instance->destroy(user_context); - halide_debug_assert(user_context, allocators.system.deallocate != nullptr); + halide_abort_if_false(user_context, allocators.system.deallocate != nullptr); allocators.system.deallocate(user_context, instance); } @@ -144,7 +152,7 @@ void BlockAllocator::initialize(void *user_context, const Config &cfg, const Mem } MemoryRegion *BlockAllocator::reserve(void *user_context, const MemoryRequest &request) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Reserve (" << "user_context=" << (void *)(user_context) << " " << "offset=" << (uint32_t)request.offset << " " @@ -156,24 +164,23 @@ MemoryRegion *BlockAllocator::reserve(void *user_context, const MemoryRequest &r #endif BlockEntry *block_entry = reserve_block_entry(user_context, request.properties, request.size, request.dedicated); if (block_entry == nullptr) { - debug(user_context) << "BlockAllocator: Failed to allocate new empty block of requested size (" + error(user_context) << "BlockAllocator: Failed to allocate new empty block of requested size (" << (int32_t)(request.size) << " bytes)!\n"; return nullptr; } BlockResource *block = static_cast(block_entry->value); - halide_debug_assert(user_context, block != nullptr); - halide_debug_assert(user_context, block->allocator != nullptr); + halide_abort_if_false(user_context, block != nullptr); + halide_abort_if_false(user_context, block->allocator != nullptr); MemoryRegion *result = reserve_memory_region(user_context, block->allocator, request); if (result == nullptr) { // Unable to reserve region in an existing block ... create a new block and try again. - size_t actual_size = constrain_requested_size(request.size); - block_entry = create_block_entry(user_context, request.properties, actual_size, request.dedicated); + block_entry = create_block_entry(user_context, request.properties, request.size, request.dedicated); if (block_entry == nullptr) { - debug(user_context) << "BlockAllocator: Out of memory! Failed to allocate empty block of size (" - << (int32_t)(actual_size) << " bytes)!\n"; + error(user_context) << "BlockAllocator: Out of memory! Failed to allocate empty block of size (" + << (int32_t)(request.size) << " bytes)!\n"; return nullptr; } @@ -187,13 +194,37 @@ MemoryRegion *BlockAllocator::reserve(void *user_context, const MemoryRequest &r return result; } -void BlockAllocator::reclaim(void *user_context, MemoryRegion *memory_region) { - halide_debug_assert(user_context, memory_region != nullptr); +int BlockAllocator::release(void *user_context, MemoryRegion *memory_region) { + if (memory_region == nullptr) { + return halide_error_code_internal_error; + } + RegionAllocator *allocator = RegionAllocator::find_allocator(user_context, memory_region); + if (allocator == nullptr) { + return halide_error_code_internal_error; + } + return allocator->release(user_context, memory_region); +} + +int BlockAllocator::reclaim(void *user_context, MemoryRegion *memory_region) { + if (memory_region == nullptr) { + return halide_error_code_internal_error; + } + RegionAllocator *allocator = RegionAllocator::find_allocator(user_context, memory_region); + if (allocator == nullptr) { + return halide_error_code_internal_error; + } + return allocator->reclaim(user_context, memory_region); +} + +int BlockAllocator::retain(void *user_context, MemoryRegion *memory_region) { + if (memory_region == nullptr) { + return halide_error_code_internal_error; + } RegionAllocator *allocator = RegionAllocator::find_allocator(user_context, memory_region); if (allocator == nullptr) { - return; + return halide_error_code_internal_error; } - allocator->reclaim(user_context, memory_region); + return allocator->retain(user_context, memory_region); } bool BlockAllocator::collect(void *user_context) { @@ -201,14 +232,26 @@ bool BlockAllocator::collect(void *user_context) { BlockEntry *block_entry = block_list.back(); while (block_entry != nullptr) { BlockEntry *prev_entry = block_entry->prev_ptr; - const BlockResource *block = static_cast(block_entry->value); if (block->allocator == nullptr) { block_entry = prev_entry; continue; } - block->allocator->collect(user_context); +#ifdef DEBUG_RUNTIME_INTERNAL + uint64_t reserved = block->reserved; +#endif + + bool collected = block->allocator->collect(user_context); + if (collected) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Collected block (" + << "block=" << (void *)block << " " + << "reserved=" << (uint32_t)block->reserved << " " + << "recovered=" << (uint32_t)(reserved - block->reserved) << " " + << ")\n"; +#endif + } if (block->reserved == 0) { destroy_block_entry(user_context, block_entry); result = true; @@ -219,16 +262,17 @@ bool BlockAllocator::collect(void *user_context) { return result; } -void BlockAllocator::release(void *user_context) { +int BlockAllocator::release(void *user_context) { BlockEntry *block_entry = block_list.back(); while (block_entry != nullptr) { BlockEntry *prev_entry = block_entry->prev_ptr; release_block_entry(user_context, block_entry); block_entry = prev_entry; } + return 0; } -void BlockAllocator::destroy(void *user_context) { +int BlockAllocator::destroy(void *user_context) { BlockEntry *block_entry = block_list.back(); while (block_entry != nullptr) { BlockEntry *prev_entry = block_entry->prev_ptr; @@ -236,12 +280,13 @@ void BlockAllocator::destroy(void *user_context) { block_entry = prev_entry; } block_list.destroy(user_context); + return 0; } MemoryRegion *BlockAllocator::reserve_memory_region(void *user_context, RegionAllocator *allocator, const MemoryRequest &request) { MemoryRegion *result = allocator->reserve(user_context, request); if (result == nullptr) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Failed to allocate region of size (" << (int32_t)(request.size) << " bytes)!\n"; #endif @@ -254,51 +299,113 @@ MemoryRegion *BlockAllocator::reserve_memory_region(void *user_context, RegionAl return result; } -BlockAllocator::BlockEntry * -BlockAllocator::find_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { - BlockEntry *block_entry = nullptr; - for (block_entry = block_list.front(); block_entry != nullptr; block_entry = block_entry->next_ptr) { - - const BlockResource *block = static_cast(block_entry->value); - if (!is_compatible_block(block, properties)) { - continue; - } +bool BlockAllocator::is_block_suitable_for_request(void *user_context, const BlockResource *block, const MemoryProperties &properties, size_t size, bool dedicated) const { + if (!is_compatible_block(block, properties)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: skipping block ... incompatible properties!\n" + << " block_resource=" << (void *)block << "\n" + << " block_size=" << (uint32_t)block->memory.size << "\n" + << " block_reserved=" << (uint32_t)block->reserved << "\n" + << " block_usage=" << halide_memory_usage_name(block->memory.properties.usage) << "\n" + << " block_caching=" << halide_memory_caching_name(block->memory.properties.caching) << "\n" + << " block_visibility=" << halide_memory_visibility_name(block->memory.properties.visibility) << "\n"; + debug(user_context) << " request_size=" << (uint32_t)size << "\n" + << " request_usage=" << halide_memory_usage_name(properties.usage) << "\n" + << " request_caching=" << halide_memory_caching_name(properties.caching) << "\n" + << " request_visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; +#endif + // skip blocks that are using incompatible memory + return false; + } + if (dedicated && (block->reserved > 0)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: skipping block ... can be used for dedicated allocation!\n" + << " block_resource=" << (void *)block << "\n" + << " block_size=" << (uint32_t)block->memory.size << "\n" + << " block_reserved=" << (uint32_t)block->reserved << "\n"; +#endif // skip blocks that can't be dedicated to a single allocation - if (dedicated && (block->reserved > 0)) { - continue; - } - + return false; + + } else if (block->memory.dedicated && (block->reserved > 0)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: skipping block ... already dedicated to an allocation!\n" + << " block_resource=" << (void *)block << "\n" + << " block_size=" << (uint32_t)block->memory.size << "\n" + << " block_reserved=" << (uint32_t)block->reserved << "\n"; +#endif // skip dedicated blocks that are already allocated - if (block->memory.dedicated && (block->reserved > 0)) { - continue; - } + return false; + } + + size_t available = (block->memory.size - block->reserved); + if (available >= size) { + return true; + } + + return false; +} - size_t available = (block->memory.size - block->reserved); - if (available >= size) { -#ifdef DEBUG_RUNTIME - debug(user_context) << "BlockAllocator: find_block_entry (FOUND) (" - << "user_context=" << (void *)(user_context) << " " - << "block_entry=" << (void *)(block_entry) << " " - << "size=" << (uint32_t)size << " " - << "dedicated=" << (dedicated ? "true" : "false") << " " - << "usage=" << halide_memory_usage_name(properties.usage) << " " - << "caching=" << halide_memory_caching_name(properties.caching) << " " - << "visibility=" << halide_memory_visibility_name(properties.visibility) << ") ...\n"; +BlockAllocator::BlockEntry * +BlockAllocator::find_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { + BlockEntry *block_entry = block_list.back(); + while (block_entry != nullptr) { + BlockEntry *prev_entry = block_entry->prev_ptr; + const BlockResource *block = static_cast(block_entry->value); + if (is_block_suitable_for_request(user_context, block, properties, size, dedicated)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: found suitable block ...\n" + << " user_context=" << (void *)(user_context) << "\n" + << " block_resource=" << (void *)block << "\n" + << " block_size=" << (uint32_t)block->memory.size << "\n" + << " block_reserved=" << (uint32_t)block->reserved << "\n" + << " request_size=" << (uint32_t)size << "\n" + << " dedicated=" << (dedicated ? "true" : "false") << "\n" + << " usage=" << halide_memory_usage_name(properties.usage) << "\n" + << " caching=" << halide_memory_caching_name(properties.caching) << "\n" + << " visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; #endif - break; + return block_entry; } + block_entry = prev_entry; } + if (block_entry == nullptr) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: couldn't find suitable block!\n" + << " user_context=" << (void *)(user_context) << "\n" + << " request_size=" << (uint32_t)size << "\n" + << " dedicated=" << (dedicated ? "true" : "false") << "\n" + << " usage=" << halide_memory_usage_name(properties.usage) << "\n" + << " caching=" << halide_memory_caching_name(properties.caching) << "\n" + << " visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; +#endif + } return block_entry; } BlockAllocator::BlockEntry * BlockAllocator::reserve_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: reserving block ... !\n" + << " requested_size=" << (uint32_t)size << "\n" + << " requested_is_dedicated=" << (dedicated ? "true" : "false") << "\n" + << " requested_usage=" << halide_memory_usage_name(properties.usage) << "\n" + << " requested_caching=" << halide_memory_caching_name(properties.caching) << "\n" + << " requested_visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; +#endif BlockEntry *block_entry = find_block_entry(user_context, properties, size, dedicated); if (block_entry == nullptr) { - size_t actual_size = constrain_requested_size(size); - block_entry = create_block_entry(user_context, properties, actual_size, dedicated); +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockAllocator: creating block ... !\n" + << " requested_size=" << (uint32_t)size << "\n" + << " requested_is_dedicated=" << (dedicated ? "true" : "false") << "\n" + << " requested_usage=" << halide_memory_usage_name(properties.usage) << "\n" + << " requested_caching=" << halide_memory_caching_name(properties.caching) << "\n" + << " requested_visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; +#endif + block_entry = create_block_entry(user_context, properties, size, dedicated); } if (block_entry) { @@ -312,12 +419,12 @@ BlockAllocator::reserve_block_entry(void *user_context, const MemoryProperties & RegionAllocator * BlockAllocator::create_region_allocator(void *user_context, BlockResource *block) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Creating region allocator (" << "user_context=" << (void *)(user_context) << " " << "block_resource=" << (void *)(block) << ")...\n"; #endif - halide_debug_assert(user_context, block != nullptr); + halide_abort_if_false(user_context, block != nullptr); RegionAllocator *region_allocator = RegionAllocator::create( user_context, block, {allocators.system, allocators.region}); @@ -329,22 +436,29 @@ BlockAllocator::create_region_allocator(void *user_context, BlockResource *block return region_allocator; } -void BlockAllocator::destroy_region_allocator(void *user_context, RegionAllocator *region_allocator) { -#ifdef DEBUG_RUNTIME +int BlockAllocator::destroy_region_allocator(void *user_context, RegionAllocator *region_allocator) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Destroying region allocator (" << "user_context=" << (void *)(user_context) << " " << "region_allocator=" << (void *)(region_allocator) << ")...\n"; #endif if (region_allocator == nullptr) { - return; + return 0; } - RegionAllocator::destroy(user_context, region_allocator); + return RegionAllocator::destroy(user_context, region_allocator); } BlockAllocator::BlockEntry * BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &properties, size_t size, bool dedicated) { + if (config.maximum_pool_size && (pool_size() >= config.maximum_pool_size)) { + error(user_context) << "BlockAllocator: No free blocks found! Maximum pool size reached (" + << (int32_t)(config.maximum_pool_size) << " bytes or " + << (int32_t)(config.maximum_pool_size / (1024 * 1024)) << " MB)\n"; + return nullptr; + } + if (config.maximum_block_count && (block_count() >= config.maximum_block_count)) { - debug(user_context) << "BlockAllocator: No free blocks found! Maximum block count reached (" + error(user_context) << "BlockAllocator: No free blocks found! Maximum block count reached (" << (int32_t)(config.maximum_block_count) << ")!\n"; return nullptr; } @@ -355,7 +469,7 @@ BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &p return nullptr; } -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Creating block entry (" << "block_entry=" << (void *)(block_entry) << " " << "block=" << (void *)(block_entry->value) << " " @@ -363,8 +477,10 @@ BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &p #endif BlockResource *block = static_cast(block_entry->value); - block->memory.size = size; + block->memory.size = constrain_requested_size(size); + block->memory.handle = nullptr; block->memory.properties = properties; + block->memory.properties.nearest_multiple = max(config.nearest_multiple, properties.nearest_multiple); block->memory.dedicated = dedicated; block->reserved = 0; block->allocator = create_region_allocator(user_context, block); @@ -372,20 +488,21 @@ BlockAllocator::create_block_entry(void *user_context, const MemoryProperties &p return block_entry; } -void BlockAllocator::release_block_entry(void *user_context, BlockAllocator::BlockEntry *block_entry) { -#ifdef DEBUG_RUNTIME +int BlockAllocator::release_block_entry(void *user_context, BlockAllocator::BlockEntry *block_entry) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Releasing block entry (" << "block_entry=" << (void *)(block_entry) << " " << "block=" << (void *)(block_entry->value) << ")...\n"; #endif BlockResource *block = static_cast(block_entry->value); if (block->allocator) { - block->allocator->release(user_context); + return block->allocator->release(user_context); } + return 0; } -void BlockAllocator::destroy_block_entry(void *user_context, BlockAllocator::BlockEntry *block_entry) { -#ifdef DEBUG_RUNTIME +int BlockAllocator::destroy_block_entry(void *user_context, BlockAllocator::BlockEntry *block_entry) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Destroying block entry (" << "block_entry=" << (void *)(block_entry) << " " << "block=" << (void *)(block_entry->value) << " " @@ -398,31 +515,38 @@ void BlockAllocator::destroy_block_entry(void *user_context, BlockAllocator::Blo } free_memory_block(user_context, block); block_list.remove(user_context, block_entry); + return 0; } -void BlockAllocator::alloc_memory_block(void *user_context, BlockResource *block) { -#ifdef DEBUG_RUNTIME +int BlockAllocator::alloc_memory_block(void *user_context, BlockResource *block) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Allocating block (ptr=" << (void *)block << " allocator=" << (void *)allocators.block.allocate << ")...\n"; #endif - halide_debug_assert(user_context, allocators.block.allocate != nullptr); + halide_abort_if_false(user_context, allocators.block.allocate != nullptr); MemoryBlock *memory_block = &(block->memory); allocators.block.allocate(user_context, memory_block); block->reserved = 0; + return 0; } -void BlockAllocator::free_memory_block(void *user_context, BlockResource *block) { -#ifdef DEBUG_RUNTIME +int BlockAllocator::free_memory_block(void *user_context, BlockResource *block) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockAllocator: Deallocating block (ptr=" << (void *)block << " allocator=" << (void *)allocators.block.deallocate << ")...\n"; #endif - halide_debug_assert(user_context, allocators.block.deallocate != nullptr); + halide_abort_if_false(user_context, allocators.block.deallocate != nullptr); MemoryBlock *memory_block = &(block->memory); allocators.block.deallocate(user_context, memory_block); + memory_block->handle = nullptr; block->reserved = 0; block->memory.size = 0; + return 0; } size_t BlockAllocator::constrain_requested_size(size_t size) const { size_t actual_size = size; + if (config.nearest_multiple) { + actual_size = (((actual_size + config.nearest_multiple - 1) / config.nearest_multiple) * config.nearest_multiple); + } if (config.minimum_block_size) { actual_size = ((actual_size < config.minimum_block_size) ? config.minimum_block_size : @@ -433,6 +557,7 @@ size_t BlockAllocator::constrain_requested_size(size_t size) const { config.maximum_block_size : actual_size); } + return actual_size; } @@ -475,6 +600,18 @@ size_t BlockAllocator::block_count() const { return block_list.size(); } +size_t BlockAllocator::pool_size() const { + size_t total_size = 0; + BlockEntry const *block_entry = nullptr; + for (block_entry = block_list.front(); block_entry != nullptr; block_entry = block_entry->next_ptr) { + const BlockResource *block = static_cast(block_entry->value); + if (block != nullptr) { + total_size += block->memory.size; + } + } + return total_size; +} + // -- } // namespace Internal diff --git a/src/runtime/internal/block_storage.h b/src/runtime/internal/block_storage.h index 47f4e38a5111..c99709d9e4ae 100644 --- a/src/runtime/internal/block_storage.h +++ b/src/runtime/internal/block_storage.h @@ -1,7 +1,8 @@ #ifndef HALIDE_RUNTIME_BLOCK_STORAGE_H #define HALIDE_RUNTIME_BLOCK_STORAGE_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" +#include "../printer.h" #include "memory_resources.h" namespace Halide { @@ -58,6 +59,8 @@ class BlockStorage { void destroy(void *user_context); bool empty() const; + bool full() const; + bool is_valid(size_t index) const; size_t stride() const; size_t size() const; @@ -90,9 +93,9 @@ class BlockStorage { BlockStorage::BlockStorage(void *user_context, const Config &cfg, const SystemMemoryAllocatorFns &sma) : config(cfg), allocator(sma) { - halide_debug_assert(user_context, config.entry_size != 0); - halide_debug_assert(user_context, allocator.allocate != nullptr); - halide_debug_assert(user_context, allocator.deallocate != nullptr); + halide_abort_if_false(user_context, config.entry_size != 0); + halide_abort_if_false(user_context, allocator.allocate != nullptr); + halide_abort_if_false(user_context, allocator.deallocate != nullptr); if (config.minimum_capacity) { reserve(user_context, config.minimum_capacity); } @@ -111,7 +114,7 @@ BlockStorage::~BlockStorage() { } void BlockStorage::destroy(void *user_context) { - halide_debug_assert(user_context, allocator.deallocate != nullptr); + halide_abort_if_false(user_context, allocator.deallocate != nullptr); if (ptr != nullptr) { allocator.deallocate(user_context, ptr); } @@ -175,12 +178,12 @@ void BlockStorage::append(void *user_context, const void *entry_ptr) { } void BlockStorage::pop_front(void *user_context) { - halide_debug_assert(user_context, count > 0); + halide_abort_if_false(user_context, count > 0); remove(user_context, 0); } void BlockStorage::pop_back(void *user_context) { - halide_debug_assert(user_context, count > 0); + halide_abort_if_false(user_context, count > 0); resize(user_context, size() - 1); } @@ -212,7 +215,7 @@ void BlockStorage::resize(void *user_context, size_t entry_count, bool realloc) return; } -#if DEBUG +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "BlockStorage: Resize (" << "requested_size=" << (int32_t)requested_size << " " << "current_size=" << (int32_t)current_size << " " @@ -248,22 +251,22 @@ void BlockStorage::remove(void *user_context, size_t index) { } void BlockStorage::remove(void *user_context, size_t index, size_t entry_count) { - halide_debug_assert(user_context, index < count); + halide_abort_if_false(user_context, index < count); const size_t last_index = size(); if (index < (last_index - entry_count)) { size_t dst_offset = index * config.entry_size; size_t src_offset = (index + entry_count) * config.entry_size; size_t bytes = (last_index - index - entry_count) * config.entry_size; -#if DEBUG - debug(0) << "BlockStorage: Remove (" - << "index=" << (int32_t)index << " " - << "entry_count=" << (int32_t)entry_count << " " - << "entry_size=" << (int32_t)config.entry_size << " " - << "last_index=" << (int32_t)last_index << " " - << "src_offset=" << (int32_t)src_offset << " " - << "dst_offset=" << (int32_t)dst_offset << " " - << "bytes=" << (int32_t)bytes << ")...\n"; +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockStorage: Remove (" + << "index=" << (int32_t)index << " " + << "entry_count=" << (int32_t)entry_count << " " + << "entry_size=" << (int32_t)config.entry_size << " " + << "last_index=" << (int32_t)last_index << " " + << "src_offset=" << (int32_t)src_offset << " " + << "dst_offset=" << (int32_t)dst_offset << " " + << "bytes=" << (int32_t)bytes << ")...\n"; #endif void *dst_ptr = offset_address(ptr, dst_offset); void *src_ptr = offset_address(ptr, src_offset); @@ -273,21 +276,21 @@ void BlockStorage::remove(void *user_context, size_t index, size_t entry_count) } void BlockStorage::replace(void *user_context, size_t index, const void *array, size_t array_size) { - halide_debug_assert(user_context, index < count); + halide_abort_if_false(user_context, index < count); size_t offset = index * config.entry_size; size_t remaining = count - index; #if DEBUG - debug(0) << "BlockStorage: Replace (" - << "index=" << (int32_t)index << " " - << "array_size=" << (int32_t)array_size << " " - << "entry_size=" << (int32_t)config.entry_size << " " - << "offset=" << (int32_t)offset << " " - << "remaining=" << (int32_t)remaining << " " - << "capacity=" << (int32_t)capacity << ")...\n"; + debug(user_context) << "BlockStorage: Replace (" + << "index=" << (int32_t)index << " " + << "array_size=" << (int32_t)array_size << " " + << "entry_size=" << (int32_t)config.entry_size << " " + << "offset=" << (int32_t)offset << " " + << "remaining=" << (int32_t)remaining << " " + << "capacity=" << (int32_t)capacity << ")...\n"; #endif - halide_debug_assert(user_context, remaining > 0); + halide_abort_if_false(user_context, remaining > 0); size_t copy_count = min(remaining, array_size); void *dst_ptr = offset_address(ptr, offset); memcpy(dst_ptr, array, copy_count * config.entry_size); @@ -295,7 +298,7 @@ void BlockStorage::replace(void *user_context, size_t index, const void *array, } void BlockStorage::insert(void *user_context, size_t index, const void *array, size_t array_size) { - halide_debug_assert(user_context, index <= count); + halide_abort_if_false(user_context, index <= count); const size_t last_index = size(); resize(user_context, last_index + array_size); if (index < last_index) { @@ -322,6 +325,14 @@ bool BlockStorage::empty() const { return count == 0; } +bool BlockStorage::full() const { + return (count >= capacity); +} + +bool BlockStorage::is_valid(size_t index) const { + return (index < capacity); +} + size_t BlockStorage::size() const { return count; } @@ -331,12 +342,12 @@ size_t BlockStorage::stride() const { } void *BlockStorage::operator[](size_t index) { - halide_debug_assert(nullptr, index < capacity); + halide_abort_if_false(nullptr, index < capacity); return offset_address(ptr, index * config.entry_size); } const void *BlockStorage::operator[](size_t index) const { - halide_debug_assert(nullptr, index < capacity); + halide_abort_if_false(nullptr, index < capacity); return offset_address(ptr, index * config.entry_size); } @@ -345,12 +356,12 @@ void *BlockStorage::data() { } void *BlockStorage::front() { - halide_debug_assert(nullptr, count > 0); + halide_abort_if_false(nullptr, count > 0); return ptr; } void *BlockStorage::back() { - halide_debug_assert(nullptr, count > 0); + halide_abort_if_false(nullptr, count > 0); size_t index = count - 1; return offset_address(ptr, index * config.entry_size); } @@ -360,37 +371,37 @@ const void *BlockStorage::data() const { } const void *BlockStorage::front() const { - halide_debug_assert(nullptr, count > 0); + halide_abort_if_false(nullptr, count > 0); return ptr; } const void *BlockStorage::back() const { - halide_debug_assert(nullptr, count > 0); + halide_abort_if_false(nullptr, count > 0); size_t index = count - 1; return offset_address(ptr, index * config.entry_size); } void BlockStorage::allocate(void *user_context, size_t new_capacity) { if (new_capacity != capacity) { - halide_debug_assert(user_context, allocator.allocate != nullptr); + halide_abort_if_false(user_context, allocator.allocate != nullptr); size_t requested_bytes = new_capacity * config.entry_size; size_t block_size = max(config.block_size, config.entry_size); size_t block_count = (requested_bytes / block_size); block_count += (requested_bytes % block_size) ? 1 : 0; size_t alloc_size = block_count * block_size; -#if DEBUG - debug(0) << "BlockStorage: Allocating (" - << "requested_bytes=" << (int32_t)requested_bytes << " " - << "block_size=" << (int32_t)block_size << " " - << "block_count=" << (int32_t)block_count << " " - << "alloc_size=" << (int32_t)alloc_size << ") ...\n"; +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "BlockStorage: Allocating (" + << "requested_bytes=" << (int32_t)requested_bytes << " " + << "block_size=" << (int32_t)block_size << " " + << "block_count=" << (int32_t)block_count << " " + << "alloc_size=" << (int32_t)alloc_size << ") ...\n"; #endif void *new_ptr = alloc_size ? allocator.allocate(user_context, alloc_size) : nullptr; if (count != 0 && ptr != nullptr && new_ptr != nullptr) { memcpy(new_ptr, ptr, count * config.entry_size); } if (ptr != nullptr) { - halide_debug_assert(user_context, allocator.deallocate != nullptr); + halide_abort_if_false(user_context, allocator.deallocate != nullptr); allocator.deallocate(user_context, ptr); } capacity = new_capacity; diff --git a/src/runtime/internal/linked_list.h b/src/runtime/internal/linked_list.h index 87e564082157..2e3193b659b7 100644 --- a/src/runtime/internal/linked_list.h +++ b/src/runtime/internal/linked_list.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_LINKED_LIST_H #define HALIDE_RUNTIME_LINKED_LIST_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" #include "memory_arena.h" namespace Halide { diff --git a/src/runtime/internal/memory_arena.h b/src/runtime/internal/memory_arena.h index e4b9aa7d4e86..9c069b47f687 100644 --- a/src/runtime/internal/memory_arena.h +++ b/src/runtime/internal/memory_arena.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_MEMORY_ARENA_H #define HALIDE_RUNTIME_MEMORY_ARENA_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" #include "block_storage.h" namespace Halide { @@ -271,7 +271,7 @@ void *MemoryArena::create_entry(void *user_context, Block *block, uint32_t index void *entry_ptr = lookup_entry(user_context, block, index); block->free_index = block->indices[index]; block->status[index] = AllocationStatus::InUse; -#if DEBUG_RUNTIME +#if DEBUG_RUNTIME_INTERNAL memset(entry_ptr, 0, config.entry_size); #endif return entry_ptr; diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h index bd3d4170f3fe..e30afb0dd4ea 100644 --- a/src/runtime/internal/memory_resources.h +++ b/src/runtime/internal/memory_resources.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_MEMORY_RESOURCES_H #define HALIDE_RUNTIME_MEMORY_RESOURCES_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" namespace Halide { namespace Runtime { @@ -57,6 +57,8 @@ struct MemoryProperties { MemoryVisibility visibility = MemoryVisibility::InvalidVisibility; MemoryUsage usage = MemoryUsage::InvalidUsage; MemoryCaching caching = MemoryCaching::InvalidCaching; + size_t alignment = 0; //< required alignment of allocations (zero for no constraint) + size_t nearest_multiple = 0; //< require the allocation size to round up to the nearest multiple (zero means no rounding) }; // Client-facing struct for exchanging memory block allocation requests @@ -67,12 +69,20 @@ struct MemoryBlock { MemoryProperties properties; //< properties for the allocated block }; +// Client-facing struct for specifying a range of a memory region (eg for crops) +struct MemoryRange { + size_t head_offset = 0; //< byte offset from start of region + size_t tail_offset = 0; //< byte offset from end of region +}; + // Client-facing struct for exchanging memory region allocation requests struct MemoryRegion { - void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) + void *handle = nullptr; //< client data storing native handle (managed by alloc_block_region/free_block_region) or a pointer to region owning allocation size_t offset = 0; //< offset from base address in block (in bytes) size_t size = 0; //< allocated size (in bytes) + MemoryRange range; //< optional range (e.g. for handling crops, etc) bool dedicated = false; //< flag indicating whether allocation is one dedicated resource (or split/shared into other resources) + bool is_owner = true; //< flag indicating whether allocation is owned by this region, in which case handle is a native handle. Otherwise handle points to owning region of alloction. MemoryProperties properties; //< properties for the allocated region }; @@ -101,19 +111,33 @@ struct BlockResource { // -- Note: first field must MemoryRegion struct BlockRegion { MemoryRegion memory; //< memory info for the allocated region + uint32_t usage_count = 0; //< number of active clients using region AllocationStatus status = AllocationStatus::InvalidStatus; //< allocation status indicator BlockRegion *next_ptr = nullptr; //< pointer to next block region in linked list BlockRegion *prev_ptr = nullptr; //< pointer to prev block region in linked list BlockResource *block_ptr = nullptr; //< pointer to parent block resource }; +// Returns true if given byte alignment is a power of two +ALWAYS_INLINE bool is_power_of_two_alignment(size_t x) { + return (x & (x - 1)) == 0; +} + // Returns an aligned byte offset to adjust the given offset based on alignment constraints // -- Alignment must be power of two! ALWAYS_INLINE size_t aligned_offset(size_t offset, size_t alignment) { + halide_abort_if_false(nullptr, is_power_of_two_alignment(alignment)); return (offset + (alignment - 1)) & ~(alignment - 1); } -// Returns a padded size to accomodate an adjusted offset due to alignment constraints +// Returns a suitable alignment such that requested alignment is a suitable +// integer multiple of the required alignment +ALWAYS_INLINE size_t conform_alignment(size_t requested, size_t required) { + size_t alignment = max(requested, required); + return ((required > 0) && (alignment > required)) ? (required * ((alignment / required) + 1)) : alignment; +} + +// Returns a padded size to accommodate an adjusted offset due to alignment constraints // -- Alignment must be power of two! ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment) { size_t actual_offset = aligned_offset(offset, alignment); @@ -122,6 +146,19 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment) return actual_size; } +// Returns a padded size to accommodate an adjusted offset due to alignment constraints rounded up to the nearest multiple +// -- Alignment must be power of two! +ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) { + size_t adjusted_size = aligned_size(offset, size, alignment); + adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size; + if (nearest_multiple > 0) { + size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple); + return rounded_size; + } else { + return adjusted_size; + } +} + // Clamps the given value to be within the [min_value, max_value] range ALWAYS_INLINE size_t clamped_size(size_t value, size_t min_value, size_t max_value) { size_t result = (value < min_value) ? min_value : value; @@ -163,16 +200,16 @@ struct HalideSystemAllocatorFns { DeallocateSystemFn deallocate = halide_free; }; -typedef void (*AllocateBlockFn)(void *, MemoryBlock *); -typedef void (*DeallocateBlockFn)(void *, MemoryBlock *); +typedef int (*AllocateBlockFn)(void *, MemoryBlock *); +typedef int (*DeallocateBlockFn)(void *, MemoryBlock *); struct MemoryBlockAllocatorFns { AllocateBlockFn allocate = nullptr; DeallocateBlockFn deallocate = nullptr; }; -typedef void (*AllocateRegionFn)(void *, MemoryRegion *); -typedef void (*DeallocateRegionFn)(void *, MemoryRegion *); +typedef int (*AllocateRegionFn)(void *, MemoryRegion *); +typedef int (*DeallocateRegionFn)(void *, MemoryRegion *); struct MemoryRegionAllocatorFns { AllocateRegionFn allocate = nullptr; diff --git a/src/runtime/internal/pointer_table.h b/src/runtime/internal/pointer_table.h index 0277e780a08d..51f250ee2396 100644 --- a/src/runtime/internal/pointer_table.h +++ b/src/runtime/internal/pointer_table.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_POINTER_TABLE_H #define HALIDE_RUNTIME_POINTER_TABLE_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" #include "memory_resources.h" namespace Halide { @@ -185,7 +185,7 @@ void PointerTable::resize(void *user_context, size_t entry_count, bool realloc) size_t actual_size = current_size; count = requested_size; -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "PointerTable: Resize (" << "requested_size=" << (int32_t)requested_size << " " << "current_size=" << (int32_t)current_size << " " @@ -235,7 +235,7 @@ void PointerTable::remove(void *user_context, size_t index, size_t entry_count) size_t src_offset = (index + entry_count) * sizeof(void *); size_t bytes = (last_index - index - entry_count) * sizeof(void *); -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "PointerTable: Remove (" << "index=" << (int32_t)index << " " << "entry_count=" << (int32_t)entry_count << " " @@ -254,8 +254,7 @@ void PointerTable::replace(void *user_context, size_t index, const void **array, size_t remaining = count - index; size_t copy_count = min(remaining, array_size); -#ifdef DEBUG_RUNTIME - +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "PointerTable: Replace (" << "index=" << (int32_t)index << " " << "array_size=" << (int32_t)array_size << " " @@ -333,7 +332,7 @@ void PointerTable::allocate(void *user_context, size_t new_capacity) { halide_debug_assert(user_context, allocator.allocate != nullptr); size_t bytes = new_capacity * sizeof(void *); -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "PointerTable: Allocating (bytes=" << (int32_t)bytes << " allocator=" << (void *)allocator.allocate << ")...\n"; #endif diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h index 71b9e6c52b27..0bf3765301d7 100644 --- a/src/runtime/internal/region_allocator.h +++ b/src/runtime/internal/region_allocator.h @@ -1,10 +1,10 @@ #ifndef HALIDE_RUNTIME_REGION_ALLOCATOR_H #define HALIDE_RUNTIME_REGION_ALLOCATOR_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" +#include "../printer.h" #include "memory_arena.h" #include "memory_resources.h" -#include "printer.h" namespace Halide { namespace Runtime { @@ -39,70 +39,83 @@ class RegionAllocator { // Factory methods for creation / destruction static RegionAllocator *create(void *user_context, BlockResource *block, const MemoryAllocators &ma); - static void destroy(void *user_context, RegionAllocator *region_allocator); + static int destroy(void *user_context, RegionAllocator *region_allocator); // Returns the allocator class instance for the given allocation (or nullptr) static RegionAllocator *find_allocator(void *user_context, MemoryRegion *memory_region); // Public interface methods MemoryRegion *reserve(void *user_context, const MemoryRequest &request); - void reclaim(void *user_context, MemoryRegion *memory_region); - bool collect(void *user_context); //< returns true if any blocks were removed - void release(void *user_context); - void destroy(void *user_context); + int release(void *user_context, MemoryRegion *memory_region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *memory_region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *memory_region); //< retain the region and increase usage count + bool collect(void *user_context); //< returns true if any blocks were removed + int release(void *user_context); + int destroy(void *user_context); // Returns the currently managed block resource BlockResource *block_resource() const; private: // Initializes a new instance - void initialize(void *user_context, BlockResource *block, const MemoryAllocators &ma); + int initialize(void *user_context, BlockResource *block, const MemoryAllocators &ma); // Search through allocated block regions (Best-Fit) BlockRegion *find_block_region(void *user_context, const MemoryRequest &request); + // Returns true if block region is unused and available + bool is_available(const BlockRegion *region) const; + // Returns true if neighbouring block regions to the given region can be coalesced into one - bool can_coalesce(BlockRegion *region); + bool can_coalesce(const BlockRegion *region) const; // Merges available neighbouring block regions into the given region BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region); - // Returns true if the given region can be split to accomadate the given size - bool can_split(BlockRegion *region, size_t size); + // Returns true if the given region can be split to accomodate the given size + bool can_split(const BlockRegion *region, size_t size) const; - // Splits the given block region into a smaller region to accomadate the given size, followed by empty space for the remaining + // Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining BlockRegion *split_block_region(void *user_context, BlockRegion *region, size_t size, size_t alignment); // Creates a new block region and adds it to the region list BlockRegion *create_block_region(void *user_context, const MemoryProperties &properties, size_t offset, size_t size, bool dedicated); // Creates a new block region and adds it to the region list - void destroy_block_region(void *user_context, BlockRegion *region); + int destroy_block_region(void *user_context, BlockRegion *region); // Invokes the allocation callback to allocate memory for the block region - void alloc_block_region(void *user_context, BlockRegion *region); + int alloc_block_region(void *user_context, BlockRegion *region); // Releases a block region and leaves it in the list for further allocations - void release_block_region(void *user_context, BlockRegion *region); + int release_block_region(void *user_context, BlockRegion *region); // Invokes the deallocation callback to free memory for the block region - void free_block_region(void *user_context, BlockRegion *region); + int free_block_region(void *user_context, BlockRegion *region); + + // Returns true if the given block region is the last region in the list + bool is_last_block_region(void *user_context, const BlockRegion *region) const; // Returns true if the given block region is compatible with the given properties bool is_compatible_block_region(const BlockRegion *region, const MemoryProperties &properties) const; + // Returns true if the given block region is suitable for the requested allocation + bool is_block_region_suitable_for_request(void *user_context, const BlockRegion *region, const MemoryRequest &request) const; + + // Returns the number of active regions for the block; + size_t region_count(void *user_context) const; + BlockResource *block = nullptr; MemoryArena *arena = nullptr; MemoryAllocators allocators; }; RegionAllocator *RegionAllocator::create(void *user_context, BlockResource *block_resource, const MemoryAllocators &allocators) { - halide_debug_assert(user_context, allocators.system.allocate != nullptr); + halide_abort_if_false(user_context, allocators.system.allocate != nullptr); RegionAllocator *result = reinterpret_cast( allocators.system.allocate(user_context, sizeof(RegionAllocator))); if (result == nullptr) { - halide_error(user_context, "RegionAllocator: Failed to create instance! Out of memory!\n"); return nullptr; } @@ -110,32 +123,36 @@ RegionAllocator *RegionAllocator::create(void *user_context, BlockResource *bloc return result; } -void RegionAllocator::destroy(void *user_context, RegionAllocator *instance) { - halide_debug_assert(user_context, instance != nullptr); +int RegionAllocator::destroy(void *user_context, RegionAllocator *instance) { + halide_abort_if_false(user_context, instance != nullptr); const MemoryAllocators &allocators = instance->allocators; instance->destroy(user_context); - halide_debug_assert(user_context, allocators.system.deallocate != nullptr); + halide_abort_if_false(user_context, allocators.system.deallocate != nullptr); allocators.system.deallocate(user_context, instance); + return 0; } -void RegionAllocator::initialize(void *user_context, BlockResource *mb, const MemoryAllocators &ma) { +int RegionAllocator::initialize(void *user_context, BlockResource *mb, const MemoryAllocators &ma) { block = mb; allocators = ma; arena = MemoryArena::create(user_context, {sizeof(BlockRegion), MemoryArena::default_capacity, 0}, allocators.system); - halide_debug_assert(user_context, arena != nullptr); + halide_abort_if_false(user_context, arena != nullptr); block->allocator = this; block->regions = create_block_region( user_context, block->memory.properties, 0, block->memory.size, block->memory.dedicated); + return 0; } MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &request) { - halide_debug_assert(user_context, request.size > 0); + halide_abort_if_false(user_context, request.size > 0); + size_t actual_alignment = conform_alignment(request.alignment, block->memory.properties.alignment); + size_t actual_size = conform_size(request.offset, request.size, actual_alignment, block->memory.properties.nearest_multiple); size_t remaining = block->memory.size - block->reserved; - if (remaining < request.size) { -#ifdef DEBUG_RUNTIME + if (remaining < actual_size) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Unable to reserve more memory from block " << "-- requested size (" << (int32_t)(request.size) << " bytes) " << "greater than available (" << (int32_t)(remaining) << " bytes)!\n"; @@ -145,7 +162,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & BlockRegion *block_region = find_block_region(user_context, request); if (block_region == nullptr) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Failed to locate region for requested size (" << (int32_t)(request.size) << " bytes)!\n"; #endif @@ -153,7 +170,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & } if (can_split(block_region, request.size)) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") " << "to accomodate requested size (" << (int32_t)(request.size) << " bytes)!\n"; #endif @@ -164,75 +181,184 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest & return reinterpret_cast(block_region); } -void RegionAllocator::reclaim(void *user_context, MemoryRegion *memory_region) { +int RegionAllocator::release(void *user_context, MemoryRegion *memory_region) { + BlockRegion *block_region = reinterpret_cast(memory_region); + halide_abort_if_false(user_context, block_region != nullptr); + halide_abort_if_false(user_context, block_region->block_ptr == block); + if (block_region->usage_count > 0) { + block_region->usage_count--; + } + return release_block_region(user_context, block_region); +} + +int RegionAllocator::reclaim(void *user_context, MemoryRegion *memory_region) { BlockRegion *block_region = reinterpret_cast(memory_region); - halide_debug_assert(user_context, block_region != nullptr); - halide_debug_assert(user_context, block_region->block_ptr == block); + halide_abort_if_false(user_context, block_region != nullptr); + halide_abort_if_false(user_context, block_region->block_ptr == block); + if (block_region->usage_count > 0) { + block_region->usage_count--; + } + release_block_region(user_context, block_region); free_block_region(user_context, block_region); if (can_coalesce(block_region)) { block_region = coalesce_block_regions(user_context, block_region); } + return 0; +} + +int RegionAllocator::retain(void *user_context, MemoryRegion *memory_region) { + BlockRegion *block_region = reinterpret_cast(memory_region); + halide_abort_if_false(user_context, block_region != nullptr); + halide_abort_if_false(user_context, block_region->block_ptr == block); + block_region->usage_count++; + return 0; } RegionAllocator *RegionAllocator::find_allocator(void *user_context, MemoryRegion *memory_region) { BlockRegion *block_region = reinterpret_cast(memory_region); - halide_debug_assert(user_context, block_region != nullptr); - halide_debug_assert(user_context, block_region->block_ptr != nullptr); + if (block_region == nullptr) { + return nullptr; + } + if (block_region->block_ptr == nullptr) { + return nullptr; + } return block_region->block_ptr->allocator; } -BlockRegion *RegionAllocator::find_block_region(void *user_context, const MemoryRequest &request) { - BlockRegion *result = nullptr; - for (BlockRegion *block_region = block->regions; block_region != nullptr; block_region = block_region->next_ptr) { +bool RegionAllocator::is_last_block_region(void *user_context, const BlockRegion *region) const { + return ((region == nullptr) || (region == region->next_ptr) || (region->next_ptr == nullptr)); +} - if (block_region->status != AllocationStatus::Available) { - continue; - } +bool RegionAllocator::is_block_region_suitable_for_request(void *user_context, const BlockRegion *region, const MemoryRequest &request) const { + if (!is_available(region)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: skipping block region ... not available! " + << " block_region=" << (void *)region << "\n"; +#endif + return false; + } - // skip incompatible block regions for this request - if (!is_compatible_block_region(block_region, request.properties)) { - continue; - } + // skip incompatible block regions for this request + if (!is_compatible_block_region(region, request.properties)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: skipping block region ... incompatible properties! " + << " block_region=" << (void *)region << "\n"; +#endif + return false; + } - // is the requested size larger than the current region? - if (request.size > block_region->memory.size) { - continue; - } + size_t actual_alignment = conform_alignment(request.alignment, block->memory.properties.alignment); + size_t actual_size = conform_size(region->memory.offset, request.size, actual_alignment, block->memory.properties.nearest_multiple); - size_t actual_size = aligned_size(block_region->memory.offset, request.size, request.alignment); + // is the adjusted size larger than the current region? + if (actual_size > region->memory.size) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: skipping block region ... not enough space for adjusted size! " + << " block_region=" << (void *)region << "\n"; +#endif + return false; + } + + // will the adjusted size fit within the remaining unallocated space? + if ((actual_size + block->reserved) <= block->memory.size) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: found suitable block region! " + << " block_region=" << (void *)region << "\n"; +#endif + return true; // you betcha + } - // is the adjusted size larger than the current region? - if (actual_size > block_region->memory.size) { - continue; + return false; +} + +BlockRegion *RegionAllocator::find_block_region(void *user_context, const MemoryRequest &request) { + BlockRegion *block_region = block->regions; + while (block_region != nullptr) { + if (is_block_region_suitable_for_request(user_context, block_region, request)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: found suitable region ...\n" + << " user_context=" << (void *)(user_context) << "\n" + << " block_resource=" << (void *)block << "\n" + << " block_size=" << (uint32_t)block->memory.size << "\n" + << " block_reserved=" << (uint32_t)block->reserved << "\n" + << " requested_size=" << (uint32_t)request.size << "\n" + << " requested_is_dedicated=" << (request.dedicated ? "true" : "false") << "\n" + << " requested_usage=" << halide_memory_usage_name(request.properties.usage) << "\n" + << " requested_caching=" << halide_memory_caching_name(request.properties.caching) << "\n" + << " requested_visibility=" << halide_memory_visibility_name(request.properties.visibility) << "\n"; +#endif + return block_region; } - // will the adjusted size fit within the remaining unallocated space? - if ((actual_size + block->reserved) < block->memory.size) { - result = block_region; // best-fit! + if (is_last_block_region(user_context, block_region)) { + block_region = nullptr; // end of list ... nothing found break; } + block_region = block_region->next_ptr; } - return result; + + if (block_region == nullptr) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: couldn't find suitable region!\n" + << " user_context=" << (void *)(user_context) << "\n" + << " requested_size=" << (uint32_t)request.size << "\n" + << " requested_is_dedicated=" << (request.dedicated ? "true" : "false") << "\n" + << " requested_usage=" << halide_memory_usage_name(request.properties.usage) << "\n" + << " requested_caching=" << halide_memory_caching_name(request.properties.caching) << "\n" + << " requested_visibility=" << halide_memory_visibility_name(request.properties.visibility) << "\n"; +#endif + } + + return block_region; } -bool RegionAllocator::can_coalesce(BlockRegion *block_region) { +bool RegionAllocator::is_available(const BlockRegion *block_region) const { if (block_region == nullptr) { return false; } - if (block_region->prev_ptr && (block_region->prev_ptr->status == AllocationStatus::Available)) { + if (block_region->usage_count > 0) { + return false; + } + if (block_region->status != AllocationStatus::Available) { + return false; + } + return true; +} + +bool RegionAllocator::can_coalesce(const BlockRegion *block_region) const { + if (!is_available(block_region)) { + return false; + } + if (is_available(block_region->prev_ptr)) { return true; } - if (block_region->next_ptr && (block_region->next_ptr->status == AllocationStatus::Available)) { + if (is_available(block_region->next_ptr)) { return true; } return false; } BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRegion *block_region) { - if (block_region->prev_ptr && (block_region->prev_ptr->status == AllocationStatus::Available)) { - BlockRegion *prev_region = block_region->prev_ptr; -#ifdef DEBUG_RUNTIME + if ((block_region->usage_count == 0) && (block_region->memory.handle != nullptr)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Freeing region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; +#endif + halide_abort_if_false(user_context, allocators.region.deallocate != nullptr); + MemoryRegion *memory_region = &(block_region->memory); + allocators.region.deallocate(user_context, memory_region); + block_region->memory.handle = nullptr; + } + + BlockRegion *prev_region = block_region->prev_ptr; + if (is_available(prev_region) && (prev_region != block_region)) { + +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " << "previous region (offset=" << (int32_t)prev_region->memory.offset << " size=" << (int32_t)(prev_region->memory.size) << " bytes) " << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n!"; @@ -247,10 +373,10 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe block_region = prev_region; } - if (block_region->next_ptr && (block_region->next_ptr->status == AllocationStatus::Available)) { - BlockRegion *next_region = block_region->next_ptr; + BlockRegion *next_region = block_region->next_ptr; + if (is_available(next_region) && (next_region != block_region)) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Coalescing " << "next region (offset=" << (int32_t)next_region->memory.offset << " size=" << (int32_t)(next_region->memory.size) << " bytes) " << "into current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)!\n"; @@ -267,45 +393,71 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe return block_region; } -bool RegionAllocator::can_split(BlockRegion *block_region, size_t size) { - return (block_region && (block_region->memory.size > size)); +bool RegionAllocator::can_split(const BlockRegion *block_region, size_t size) const { + return (block_region && (block_region->memory.size > size) && (block_region->usage_count == 0)); } BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, size_t size, size_t alignment) { - size_t adjusted_size = aligned_size(block_region->memory.offset, size, alignment); - size_t adjusted_offset = aligned_offset(block_region->memory.offset, alignment); - size_t empty_offset = adjusted_offset + size; - size_t empty_size = block_region->memory.size - adjusted_size; + if ((block_region->usage_count == 0) && (block_region->memory.handle != nullptr)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Split deallocate region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block_region->block_ptr->reserved << " " + << ")\n"; +#endif + halide_abort_if_false(user_context, allocators.region.deallocate != nullptr); + MemoryRegion *memory_region = &(block_region->memory); + allocators.region.deallocate(user_context, memory_region); + block_region->memory.handle = nullptr; + } -#ifdef DEBUG_RUNTIME + size_t actual_alignment = conform_alignment(alignment, block->memory.properties.alignment); + size_t actual_size = conform_size(block_region->memory.offset, size, actual_alignment, block->memory.properties.nearest_multiple); + size_t actual_offset = aligned_offset(block_region->memory.offset + size, actual_alignment); + size_t empty_size = block_region->memory.size - actual_size; + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Conforming size and alignment \n" + << " requested_size=" << (uint32_t)size << "\n" + << " actual_size=" << (uint32_t)actual_size << "\n" + << " requested_alignment=" << (uint32_t)alignment << " " + << " required_alignment=" << (uint32_t)block->memory.properties.alignment << " " + << " actual_alignment=" << (uint32_t)actual_alignment << ")\n"; +#endif + +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Splitting " << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) " - << "to create empty region (offset=" << (int32_t)empty_offset << " size=" << (int32_t)(empty_size) << " bytes)!\n"; + << "to create empty region (offset=" << (int32_t)actual_offset << " size=" << (int32_t)(empty_size) << " bytes)!\n"; #endif BlockRegion *next_region = block_region->next_ptr; BlockRegion *empty_region = create_block_region(user_context, block_region->memory.properties, - empty_offset, empty_size, + actual_offset, empty_size, block_region->memory.dedicated); - halide_debug_assert(user_context, empty_region != nullptr); + halide_abort_if_false(user_context, empty_region != nullptr); empty_region->next_ptr = next_region; if (next_region) { next_region->prev_ptr = empty_region; } + empty_region->prev_ptr = block_region; block_region->next_ptr = empty_region; - block_region->memory.size = size; + block_region->memory.size -= empty_size; return empty_region; } BlockRegion *RegionAllocator::create_block_region(void *user_context, const MemoryProperties &properties, size_t offset, size_t size, bool dedicated) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Creating block region (" << "user_context=" << (void *)(user_context) << " " << "offset=" << (uint32_t)offset << " " << "size=" << (uint32_t)size << " " + << "alignment=" << (uint32_t)properties.alignment << " " << "dedicated=" << (dedicated ? "true" : "false") << " " << "usage=" << halide_memory_usage_name(properties.usage) << " " << "caching=" << halide_memory_caching_name(properties.caching) << " " @@ -313,112 +465,224 @@ BlockRegion *RegionAllocator::create_block_region(void *user_context, const Memo #endif BlockRegion *block_region = static_cast(arena->reserve(user_context, true)); - if (block_region == nullptr) { error(user_context) << "RegionAllocator: Failed to allocate new block region!\n"; return nullptr; } -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Added block region (" << "user_context=" << (void *)(user_context) << " " << "block_region=" << (void *)(block_region) << ") ...\n"; #endif + block_region->memory.handle = nullptr; block_region->memory.offset = offset; block_region->memory.size = size; block_region->memory.properties = properties; block_region->memory.dedicated = dedicated; block_region->status = AllocationStatus::Available; block_region->block_ptr = block; + block_region->usage_count = 0; + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Creating region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << ")\n"; +#endif + return block_region; } -void RegionAllocator::release_block_region(void *user_context, BlockRegion *block_region) { -#ifdef DEBUG_RUNTIME +int RegionAllocator::release_block_region(void *user_context, BlockRegion *block_region) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Releasing block region (" << "user_context=" << (void *)(user_context) << " " << "block_region=" << (void *)(block_region) << ") ...\n"; #endif + if (block_region == nullptr) { + return 0; + } + + if (block_region->usage_count > 0) { + return 0; + } + + if (block_region->status != AllocationStatus::Available) { + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Releasing region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)(block->reserved - block_region->memory.size) << " " + << ")\n"; +#endif + + block->reserved -= block_region->memory.size; + } block_region->status = AllocationStatus::Available; + return 0; } -void RegionAllocator::destroy_block_region(void *user_context, BlockRegion *block_region) { -#ifdef DEBUG_RUNTIME +int RegionAllocator::destroy_block_region(void *user_context, BlockRegion *block_region) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Destroying block region (" << "user_context=" << (void *)(user_context) << " " << "block_region=" << (void *)(block_region) << ") ...\n"; #endif + block_region->usage_count = 0; + release_block_region(user_context, block_region); free_block_region(user_context, block_region); arena->reclaim(user_context, block_region); + return 0; } -void RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_region) { -#ifdef DEBUG_RUNTIME - debug(user_context) << "RegionAllocator: Allocating region (size=" << (int32_t)(block_region->memory.size) << ", offset=" << (int32_t)block_region->memory.offset << ")!\n"; +int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_region) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "RegionAllocator: Allocating region (user_context=" << (void *)(user_context) + << " size=" << (int32_t)(block_region->memory.size) + << " offset=" << (int32_t)block_region->memory.offset << ")!\n"; #endif - halide_debug_assert(user_context, allocators.region.allocate != nullptr); - halide_debug_assert(user_context, block_region->status == AllocationStatus::Available); + halide_abort_if_false(user_context, allocators.region.allocate != nullptr); + halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available); + int error_code = 0; MemoryRegion *memory_region = &(block_region->memory); - allocators.region.allocate(user_context, memory_region); + if (memory_region->handle == nullptr) { + error_code = allocators.region.allocate(user_context, memory_region); + memory_region->is_owner = true; + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Allocating region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; +#endif + + } else { + +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Re-using region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; +#endif + } block_region->status = block_region->memory.dedicated ? AllocationStatus::Dedicated : AllocationStatus::InUse; block->reserved += block_region->memory.size; + return error_code; } -void RegionAllocator::free_block_region(void *user_context, BlockRegion *block_region) { -#ifdef DEBUG_RUNTIME +int RegionAllocator::free_block_region(void *user_context, BlockRegion *block_region) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Freeing block region (" << "user_context=" << (void *)(user_context) << " " - << "block_region=" << (void *)(block_region) << ") ...\n"; + << "block_region=" << (void *)(block_region) << " " + << "status=" << (uint32_t)block_region->status << " " + << "usage_count=" << (uint32_t)block_region->usage_count << ") ...\n"; +#endif + if ((block_region->usage_count == 0) && (block_region->memory.handle != nullptr)) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << "Freeing region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; #endif - if ((block_region->status == AllocationStatus::InUse) || - (block_region->status == AllocationStatus::Dedicated)) { - debug(user_context) << "RegionAllocator: Deallocating region (size=" << (int32_t)(block_region->memory.size) << ", offset=" << (int32_t)block_region->memory.offset << ")!\n"; - halide_debug_assert(user_context, allocators.region.deallocate != nullptr); + halide_abort_if_false(user_context, allocators.region.deallocate != nullptr); MemoryRegion *memory_region = &(block_region->memory); allocators.region.deallocate(user_context, memory_region); - block->reserved -= block_region->memory.size; block_region->memory.size = 0; + block_region->memory.offset = 0; + block_region->memory.handle = nullptr; } + block_region->usage_count = 0; block_region->status = AllocationStatus::Available; + return 0; } -void RegionAllocator::release(void *user_context) { -#ifdef DEBUG_RUNTIME +int RegionAllocator::release(void *user_context) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Releasing all regions (" << "user_context=" << (void *)(user_context) << ") ...\n"; #endif - for (BlockRegion *block_region = block->regions; block_region != nullptr; block_region = block_region->next_ptr) { + + BlockRegion *block_region = block->regions; + while (block_region != nullptr) { release_block_region(user_context, block_region); + if (is_last_block_region(user_context, block_region)) { + break; + } + block_region = block_region->next_ptr; } + return 0; } bool RegionAllocator::collect(void *user_context) { -#ifdef DEBUG_RUNTIME +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Collecting free block regions (" << "user_context=" << (void *)(user_context) << ") ...\n"; + + uint32_t count = 0; + uint64_t reserved = block->reserved; + debug(user_context) << " collecting unused regions (" + << "block_ptr=" << (void *)block << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; +#endif + + bool has_collected = false; + BlockRegion *block_region = block->regions; + while (block_region != nullptr) { + if (can_coalesce(block_region)) { +#ifdef DEBUG_RUNTIME_INTERNAL + count++; + debug(user_context) << " collecting region (" + << "block_ptr=" << (void *)block_region->block_ptr << " " + << "block_region=" << (void *)block_region << " " + << "memory_size=" << (uint32_t)(block_region->memory.size) << " " + << "block_reserved=" << (uint32_t)block->reserved << " " + << ")\n"; #endif - bool result = false; - for (BlockRegion *block_region = block->regions; block_region != nullptr; block_region = block_region->next_ptr) { - if (block_region->status == AllocationStatus::Available) { - if (can_coalesce(block_region)) { - block_region = coalesce_block_regions(user_context, block_region); - result = true; - } + block_region = coalesce_block_regions(user_context, block_region); + has_collected = true; } + if (is_last_block_region(user_context, block_region)) { + break; + } + block_region = block_region->next_ptr; } - return result; + + if (has_collected) { +#ifdef DEBUG_RUNTIME_INTERNAL + debug(user_context) << " collected unused regions (" + << "block_ptr=" << (void *)block << " " + << "region_count=" << (uint32_t)count << " " + << "collected=" << (uint32_t)(reserved - block->reserved) << " " + << ")\n"; +#endif + } + return has_collected; } -void RegionAllocator::destroy(void *user_context) { -#ifdef DEBUG_RUNTIME +int RegionAllocator::destroy(void *user_context) { +#ifdef DEBUG_RUNTIME_INTERNAL debug(user_context) << "RegionAllocator: Destroying all block regions (" << "user_context=" << (void *)(user_context) << ") ...\n"; #endif for (BlockRegion *block_region = block->regions; block_region != nullptr;) { - if (block_region->next_ptr == nullptr) { + if (is_last_block_region(user_context, block_region)) { destroy_block_region(user_context, block_region); block_region = nullptr; } else { @@ -432,6 +696,7 @@ void RegionAllocator::destroy(void *user_context) { block->allocator = nullptr; MemoryArena::destroy(user_context, arena); arena = nullptr; + return 0; } bool RegionAllocator::is_compatible_block_region(const BlockRegion *block_region, const MemoryProperties &properties) const { @@ -456,6 +721,17 @@ bool RegionAllocator::is_compatible_block_region(const BlockRegion *block_region return true; } +size_t RegionAllocator::region_count(void *user_context) const { + if (block == nullptr) { + return 0; + } + size_t count = 0; + for (BlockRegion const *region = block->regions; !is_last_block_region(user_context, region); region = region->next_ptr) { + ++count; + } + return count; +} + BlockResource *RegionAllocator::block_resource() const { return block; } diff --git a/src/runtime/internal/string_storage.h b/src/runtime/internal/string_storage.h index e32b0459d2b6..30fef3077628 100644 --- a/src/runtime/internal/string_storage.h +++ b/src/runtime/internal/string_storage.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_STRING_STORAGE_H #define HALIDE_RUNTIME_STRING_STORAGE_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" #include "block_storage.h" namespace Halide { @@ -68,6 +68,13 @@ struct StringUtils { } return size_t(ptr - str); } + + static size_t copy_up_to(char *dst, const char *src, size_t max_chars) { + size_t length = count_length(src, max_chars); + memcpy(dst, src, length); + dst[length] = '\0'; + return length; + } }; // -- @@ -126,7 +133,7 @@ StringStorage::~StringStorage() { } StringStorage *StringStorage::create(void *user_context, const SystemMemoryAllocatorFns &system_allocator) { - halide_debug_assert(user_context, system_allocator.allocate != nullptr); + halide_abort_if_false(user_context, system_allocator.allocate != nullptr); StringStorage *result = reinterpret_cast( system_allocator.allocate(user_context, sizeof(StringStorage))); @@ -140,10 +147,10 @@ StringStorage *StringStorage::create(void *user_context, const SystemMemoryAlloc } void StringStorage::destroy(void *user_context, StringStorage *instance) { - halide_debug_assert(user_context, instance != nullptr); + halide_abort_if_false(user_context, instance != nullptr); const SystemMemoryAllocatorFns &system_allocator = instance->current_allocator(); instance->destroy(user_context); - halide_debug_assert(user_context, system_allocator.deallocate != nullptr); + halide_abort_if_false(user_context, system_allocator.deallocate != nullptr); system_allocator.deallocate(user_context, instance); } @@ -257,9 +264,11 @@ void StringStorage::prepend(void *user_context, char ch) { } void StringStorage::terminate(void *user_context, size_t length) { - if (contents.data() && (length < contents.size())) { + if (contents.is_valid(length)) { char *end_ptr = static_cast(contents[length]); (*end_ptr) = '\0'; + } else { + halide_error(user_context, "StringStorage: Failed to terminate string! Out of bounds!\n"); } } diff --git a/src/runtime/internal/string_table.h b/src/runtime/internal/string_table.h index fdc9e52e84ba..29635e4a52ec 100644 --- a/src/runtime/internal/string_table.h +++ b/src/runtime/internal/string_table.h @@ -1,7 +1,7 @@ #ifndef HALIDE_RUNTIME_STRING_TABLE_H #define HALIDE_RUNTIME_STRING_TABLE_H -#include "HalideRuntime.h" +#include "../HalideRuntime.h" #include "block_storage.h" #include "pointer_table.h" #include "string_storage.h" diff --git a/src/runtime/mini_vulkan.h b/src/runtime/mini_vulkan.h new file mode 100644 index 000000000000..184282f9a878 --- /dev/null +++ b/src/runtime/mini_vulkan.h @@ -0,0 +1,6078 @@ +#ifndef HALIDE_MINI_VULKAN_H +#define HALIDE_MINI_VULKAN_H + +/* +** Copyright (c) 2014-2017 The Khronos Group Inc. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +// Pickup integer types +#include "runtime_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* +*************************************************************************************************** +* Platform-specific directives and type declarations +*************************************************************************************************** +*/ + +/* Platform-specific calling convention macros. + * + * Platforms should define these so that Vulkan clients call Vulkan commands + * with the same calling conventions that the Vulkan implementation expects. + * + * VKAPI_ATTR - Placed before the return type in function declarations. + * Useful for C++11 and GCC/Clang-style function attribute syntax. + * VKAPI_CALL - Placed after the return type in function declarations. + * Useful for MSVC-style calling convention syntax. + * VKAPI_PTR - Placed between the '(' and '*' in function pointer types. + * + * Function declaration: VKAPI_ATTR void VKAPI_CALL vkCommand(void); + * Function pointer type: typedef void (VKAPI_PTR *PFN_vkCommand)(void); + */ +#if defined(_WIN32) +// On Windows, Vulkan commands use the stdcall convention +#define VKAPI_ATTR +#define VKAPI_CALL __stdcall +#define VKAPI_PTR VKAPI_CALL +#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH < 7 +#error "Vulkan isn't supported for the 'armeabi' NDK ABI" +#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH >= 7 && defined(__ARM_32BIT_STATE) +// On Android 32-bit ARM targets, Vulkan functions use the "hardfloat" +// calling convention, i.e. float parameters are passed in registers. This +// is true even if the rest of the application passes floats on the stack, +// as it does by default when compiling for the armeabi-v7a NDK ABI. +#define VKAPI_ATTR __attribute__((pcs("aapcs-vfp"))) +#define VKAPI_CALL +#define VKAPI_PTR VKAPI_ATTR +#else +// On other platforms, use the default calling convention +#define VKAPI_ATTR +#define VKAPI_CALL +#define VKAPI_PTR +#endif + +typedef uint32_t VkFlags; +typedef uint32_t VkBool32; +typedef uint64_t VkDeviceSize; +typedef uint32_t VkSampleMask; + +// Provided by VK_VERSION_1_0 +#define VK_API_VERSION_MAJOR(version) (((uint32_t)(version) >> 22) & 0x7FU) +#define VK_API_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3FFU) +#define VK_API_VERSION_PATCH(version) ((uint32_t)(version)&0xFFFU) +#define VK_MAKE_API_VERSION(variant, major, minor, patch) \ + ((((uint32_t)(variant)) << 29) | (((uint32_t)(major)) << 22) | (((uint32_t)(minor)) << 12) | ((uint32_t)(patch))) +#define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0) +#define VK_API_VERSION_1_1 VK_MAKE_API_VERSION(0, 1, 1, 0) +#define VK_API_VERSION_1_2 VK_MAKE_API_VERSION(0, 1, 2, 0) +#define VK_API_VERSION_1_3 VK_MAKE_API_VERSION(0, 1, 3, 0) + +#define VK_DEFINE_HANDLE(object) typedef struct object##_T *(object); +#define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object; + +VK_DEFINE_HANDLE(VkInstance) +VK_DEFINE_HANDLE(VkPhysicalDevice) +VK_DEFINE_HANDLE(VkDevice) +VK_DEFINE_HANDLE(VkQueue) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSemaphore) +VK_DEFINE_HANDLE(VkCommandBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFence) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDeviceMemory) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImage) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkEvent) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkQueryPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkImageView) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderModule) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineCache) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkRenderPass) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipeline) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSetLayout) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSampler) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorPool) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorSet) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkFramebuffer) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkCommandPool) + +#define VK_LOD_CLAMP_NONE 1000.0f +#define VK_REMAINING_MIP_LEVELS (~0U) +#define VK_REMAINING_ARRAY_LAYERS (~0U) +#define VK_WHOLE_SIZE (~0ULL) +#define VK_ATTACHMENT_UNUSED (~0U) +#define VK_TRUE 1 +#define VK_FALSE 0 +#define VK_QUEUE_FAMILY_IGNORED (~0U) +#define VK_SUBPASS_EXTERNAL (~0U) +#define VK_MAX_PHYSICAL_DEVICE_NAME_SIZE 256 +#define VK_UUID_SIZE 16 +#define VK_MAX_MEMORY_TYPES 32 +#define VK_MAX_MEMORY_HEAPS 16 +#define VK_MAX_EXTENSION_NAME_SIZE 256 +#define VK_MAX_DESCRIPTION_SIZE 256 + +typedef enum VkPipelineCacheHeaderVersion { + VK_PIPELINE_CACHE_HEADER_VERSION_ONE = 1, + VK_PIPELINE_CACHE_HEADER_VERSION_BEGIN_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_END_RANGE = VK_PIPELINE_CACHE_HEADER_VERSION_ONE, + VK_PIPELINE_CACHE_HEADER_VERSION_RANGE_SIZE = (VK_PIPELINE_CACHE_HEADER_VERSION_ONE - VK_PIPELINE_CACHE_HEADER_VERSION_ONE + 1), // NOLINT: misc-redundant-expression + VK_PIPELINE_CACHE_HEADER_VERSION_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCacheHeaderVersion; + +typedef enum VkResult { + VK_SUCCESS = 0, + VK_NOT_READY = 1, + VK_TIMEOUT = 2, + VK_EVENT_SET = 3, + VK_EVENT_RESET = 4, + VK_INCOMPLETE = 5, + VK_ERROR_OUT_OF_HOST_MEMORY = -1, + VK_ERROR_OUT_OF_DEVICE_MEMORY = -2, + VK_ERROR_INITIALIZATION_FAILED = -3, + VK_ERROR_DEVICE_LOST = -4, + VK_ERROR_MEMORY_MAP_FAILED = -5, + VK_ERROR_LAYER_NOT_PRESENT = -6, + VK_ERROR_EXTENSION_NOT_PRESENT = -7, + VK_ERROR_FEATURE_NOT_PRESENT = -8, + VK_ERROR_INCOMPATIBLE_DRIVER = -9, + VK_ERROR_TOO_MANY_OBJECTS = -10, + VK_ERROR_FORMAT_NOT_SUPPORTED = -11, + VK_ERROR_FRAGMENTED_POOL = -12, + VK_ERROR_SURFACE_LOST_KHR = -1000000000, + VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001, + VK_SUBOPTIMAL_KHR = 1000001003, + VK_ERROR_OUT_OF_DATE_KHR = -1000001004, + VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001, + VK_ERROR_VALIDATION_FAILED_EXT = -1000011001, + VK_ERROR_INVALID_SHADER_NV = -1000012000, + VK_ERROR_OUT_OF_POOL_MEMORY_KHR = -1000069000, + VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = -1000072003, + VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL, + VK_RESULT_END_RANGE = VK_INCOMPLETE, + VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FRAGMENTED_POOL + 1), // NOLINT: misc-redundant-expression + VK_RESULT_MAX_ENUM = 0x7FFFFFFF +} VkResult; + +typedef enum VkStructureType { + VK_STRUCTURE_TYPE_APPLICATION_INFO = 0, + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO = 1, + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO = 2, + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO = 3, + VK_STRUCTURE_TYPE_SUBMIT_INFO = 4, + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO = 5, + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE = 6, + VK_STRUCTURE_TYPE_BIND_SPARSE_INFO = 7, + VK_STRUCTURE_TYPE_FENCE_CREATE_INFO = 8, + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO = 9, + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO = 10, + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO = 11, + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO = 12, + VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO = 13, + VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO = 14, + VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO = 15, + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO = 16, + VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO = 17, + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO = 18, + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO = 19, + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO = 20, + VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO = 21, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO = 22, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO = 23, + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO = 24, + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO = 25, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO = 26, + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO = 27, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO = 28, + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO = 29, + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO = 30, + VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO = 31, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO = 32, + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO = 33, + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO = 34, + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET = 35, + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET = 36, + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO = 37, + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO = 38, + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO = 39, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO = 40, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO = 41, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO = 42, + VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO = 43, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER = 44, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER = 45, + VK_STRUCTURE_TYPE_MEMORY_BARRIER = 46, + VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO = 47, + VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO = 48, + VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR = 1000001000, + VK_STRUCTURE_TYPE_PRESENT_INFO_KHR = 1000001001, + VK_STRUCTURE_TYPE_DISPLAY_MODE_CREATE_INFO_KHR = 1000002000, + VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR = 1000002001, + VK_STRUCTURE_TYPE_DISPLAY_PRESENT_INFO_KHR = 1000003000, + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR = 1000004000, + VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR = 1000005000, + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR = 1000006000, + VK_STRUCTURE_TYPE_MIR_SURFACE_CREATE_INFO_KHR = 1000007000, + VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000, + VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000, + VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT = 1000011000, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD = 1000018000, + VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT = 1000022000, + VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_TAG_INFO_EXT = 1000022001, + VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT = 1000022002, + VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV = 1000026000, + VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV = 1000026001, + VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV = 1000026002, + VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, + VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHX = 1000053000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHX = 1000053001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHX = 1000053002, + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000, + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_NV = 1000056001, + VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057000, + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057001, + VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR = 1000059000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR = 1000059001, + VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2_KHR = 1000059002, + VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR = 1000059003, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR = 1000059004, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2_KHR = 1000059005, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PROPERTIES_2_KHR = 1000059006, + VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2_KHR = 1000059007, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2_KHR = 1000059008, + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHX = 1000060000, + VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO_KHX = 1000060001, + VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHX = 1000060002, + VK_STRUCTURE_TYPE_DEVICE_GROUP_RENDER_PASS_BEGIN_INFO_KHX = 1000060003, + VK_STRUCTURE_TYPE_DEVICE_GROUP_COMMAND_BUFFER_BEGIN_INFO_KHX = 1000060004, + VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHX = 1000060005, + VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHX = 1000060006, + VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHX = 1000060010, + VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_CAPABILITIES_KHX = 1000060007, + VK_STRUCTURE_TYPE_IMAGE_SWAPCHAIN_CREATE_INFO_KHX = 1000060008, + VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHX = 1000060009, + VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_INFO_KHX = 1000060011, + VK_STRUCTURE_TYPE_DEVICE_GROUP_SWAPCHAIN_CREATE_INFO_KHX = 1000060012, + VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000, + VK_STRUCTURE_TYPE_VI_SURFACE_CREATE_INFO_NN = 1000062000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GROUP_PROPERTIES_KHX = 1000070000, + VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO_KHX = 1000070001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHR = 1000071000, + VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR = 1000071001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO_KHR = 1000071002, + VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES_KHR = 1000071003, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR = 1000071004, + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR = 1000072000, + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR = 1000072001, + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR = 1000072002, + VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073000, + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR = 1000073001, + VK_STRUCTURE_TYPE_MEMORY_WIN32_HANDLE_PROPERTIES_KHR = 1000073002, + VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR = 1000073003, + VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR = 1000074000, + VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR = 1000074001, + VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR = 1000074002, + VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_KHR = 1000075000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_SEMAPHORE_INFO_KHR = 1000076000, + VK_STRUCTURE_TYPE_EXTERNAL_SEMAPHORE_PROPERTIES_KHR = 1000076001, + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR = 1000077000, + VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR = 1000078000, + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR = 1000078001, + VK_STRUCTURE_TYPE_D3D12_FENCE_SUBMIT_INFO_KHR = 1000078002, + VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR = 1000078003, + VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR = 1000079000, + VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR = 1000079001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR = 1000080000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR = 1000083000, + VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR = 1000084000, + VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR = 1000085000, + VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000, + VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001, + VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 1000086002, + VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 1000086003, + VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004, + VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV = 1000087000, + VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT = 1000090000, + VK_STRUCTURE_TYPE_DISPLAY_POWER_INFO_EXT = 1000091000, + VK_STRUCTURE_TYPE_DEVICE_EVENT_INFO_EXT = 1000091001, + VK_STRUCTURE_TYPE_DISPLAY_EVENT_INFO_EXT = 1000091002, + VK_STRUCTURE_TYPE_SWAPCHAIN_COUNTER_CREATE_INFO_EXT = 1000091003, + VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE = 1000092000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_ATTRIBUTES_PROPERTIES_NVX = 1000097000, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV = 1000098000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000, + VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001, + VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000, + VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO_KHR = 1000112000, + VK_STRUCTURE_TYPE_EXTERNAL_FENCE_PROPERTIES_KHR = 1000112001, + VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO_KHR = 1000113000, + VK_STRUCTURE_TYPE_IMPORT_FENCE_WIN32_HANDLE_INFO_KHR = 1000114000, + VK_STRUCTURE_TYPE_EXPORT_FENCE_WIN32_HANDLE_INFO_KHR = 1000114001, + VK_STRUCTURE_TYPE_FENCE_GET_WIN32_HANDLE_INFO_KHR = 1000114002, + VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR = 1000115000, + VK_STRUCTURE_TYPE_FENCE_GET_FD_INFO_KHR = 1000115001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR = 1000119000, + VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR = 1000119001, + VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR = 1000119002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR = 1000120000, + VK_STRUCTURE_TYPE_IOS_SURFACE_CREATE_INFO_MVK = 1000122000, + VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK = 1000123000, + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS_KHR = 1000127000, + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR = 1000127001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT = 1000130000, + VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT = 1000130001, + VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000, + VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001, + VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT = 1000143003, + VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT = 1000143004, + VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146000, + VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146001, + VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2_KHR = 1000146002, + VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR = 1000146003, + VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2_KHR = 1000146004, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_FEATURES_EXT = 1000148000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT = 1000148001, + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT = 1000148002, + VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV = 1000149000, + VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV = 1000152000, + VK_STRUCTURE_TYPE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160000, + VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT = 1000160001, + VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO, + VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO, + VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1), // NOLINT: misc-redundant-expression + VK_STRUCTURE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkStructureType; + +typedef enum VkSystemAllocationScope { + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND = 0, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT = 1, + VK_SYSTEM_ALLOCATION_SCOPE_CACHE = 2, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE = 3, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE = 4, + VK_SYSTEM_ALLOCATION_SCOPE_BEGIN_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_COMMAND, + VK_SYSTEM_ALLOCATION_SCOPE_END_RANGE = VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE, + VK_SYSTEM_ALLOCATION_SCOPE_RANGE_SIZE = (VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND + 1), // NOLINT: misc-redundant-expression + VK_SYSTEM_ALLOCATION_SCOPE_MAX_ENUM = 0x7FFFFFFF +} VkSystemAllocationScope; + +typedef enum VkInternalAllocationType { + VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE = 0, + VK_INTERNAL_ALLOCATION_TYPE_BEGIN_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_END_RANGE = VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE, + VK_INTERNAL_ALLOCATION_TYPE_RANGE_SIZE = (VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE - VK_INTERNAL_ALLOCATION_TYPE_EXECUTABLE + 1), // NOLINT: misc-redundant-expression + VK_INTERNAL_ALLOCATION_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkInternalAllocationType; + +typedef enum VkFormat { + VK_FORMAT_UNDEFINED = 0, + VK_FORMAT_R4G4_UNORM_PACK8 = 1, + VK_FORMAT_R4G4B4A4_UNORM_PACK16 = 2, + VK_FORMAT_B4G4R4A4_UNORM_PACK16 = 3, + VK_FORMAT_R5G6B5_UNORM_PACK16 = 4, + VK_FORMAT_B5G6R5_UNORM_PACK16 = 5, + VK_FORMAT_R5G5B5A1_UNORM_PACK16 = 6, + VK_FORMAT_B5G5R5A1_UNORM_PACK16 = 7, + VK_FORMAT_A1R5G5B5_UNORM_PACK16 = 8, + VK_FORMAT_R8_UNORM = 9, + VK_FORMAT_R8_SNORM = 10, + VK_FORMAT_R8_USCALED = 11, + VK_FORMAT_R8_SSCALED = 12, + VK_FORMAT_R8_UINT = 13, + VK_FORMAT_R8_SINT = 14, + VK_FORMAT_R8_SRGB = 15, + VK_FORMAT_R8G8_UNORM = 16, + VK_FORMAT_R8G8_SNORM = 17, + VK_FORMAT_R8G8_USCALED = 18, + VK_FORMAT_R8G8_SSCALED = 19, + VK_FORMAT_R8G8_UINT = 20, + VK_FORMAT_R8G8_SINT = 21, + VK_FORMAT_R8G8_SRGB = 22, + VK_FORMAT_R8G8B8_UNORM = 23, + VK_FORMAT_R8G8B8_SNORM = 24, + VK_FORMAT_R8G8B8_USCALED = 25, + VK_FORMAT_R8G8B8_SSCALED = 26, + VK_FORMAT_R8G8B8_UINT = 27, + VK_FORMAT_R8G8B8_SINT = 28, + VK_FORMAT_R8G8B8_SRGB = 29, + VK_FORMAT_B8G8R8_UNORM = 30, + VK_FORMAT_B8G8R8_SNORM = 31, + VK_FORMAT_B8G8R8_USCALED = 32, + VK_FORMAT_B8G8R8_SSCALED = 33, + VK_FORMAT_B8G8R8_UINT = 34, + VK_FORMAT_B8G8R8_SINT = 35, + VK_FORMAT_B8G8R8_SRGB = 36, + VK_FORMAT_R8G8B8A8_UNORM = 37, + VK_FORMAT_R8G8B8A8_SNORM = 38, + VK_FORMAT_R8G8B8A8_USCALED = 39, + VK_FORMAT_R8G8B8A8_SSCALED = 40, + VK_FORMAT_R8G8B8A8_UINT = 41, + VK_FORMAT_R8G8B8A8_SINT = 42, + VK_FORMAT_R8G8B8A8_SRGB = 43, + VK_FORMAT_B8G8R8A8_UNORM = 44, + VK_FORMAT_B8G8R8A8_SNORM = 45, + VK_FORMAT_B8G8R8A8_USCALED = 46, + VK_FORMAT_B8G8R8A8_SSCALED = 47, + VK_FORMAT_B8G8R8A8_UINT = 48, + VK_FORMAT_B8G8R8A8_SINT = 49, + VK_FORMAT_B8G8R8A8_SRGB = 50, + VK_FORMAT_A8B8G8R8_UNORM_PACK32 = 51, + VK_FORMAT_A8B8G8R8_SNORM_PACK32 = 52, + VK_FORMAT_A8B8G8R8_USCALED_PACK32 = 53, + VK_FORMAT_A8B8G8R8_SSCALED_PACK32 = 54, + VK_FORMAT_A8B8G8R8_UINT_PACK32 = 55, + VK_FORMAT_A8B8G8R8_SINT_PACK32 = 56, + VK_FORMAT_A8B8G8R8_SRGB_PACK32 = 57, + VK_FORMAT_A2R10G10B10_UNORM_PACK32 = 58, + VK_FORMAT_A2R10G10B10_SNORM_PACK32 = 59, + VK_FORMAT_A2R10G10B10_USCALED_PACK32 = 60, + VK_FORMAT_A2R10G10B10_SSCALED_PACK32 = 61, + VK_FORMAT_A2R10G10B10_UINT_PACK32 = 62, + VK_FORMAT_A2R10G10B10_SINT_PACK32 = 63, + VK_FORMAT_A2B10G10R10_UNORM_PACK32 = 64, + VK_FORMAT_A2B10G10R10_SNORM_PACK32 = 65, + VK_FORMAT_A2B10G10R10_USCALED_PACK32 = 66, + VK_FORMAT_A2B10G10R10_SSCALED_PACK32 = 67, + VK_FORMAT_A2B10G10R10_UINT_PACK32 = 68, + VK_FORMAT_A2B10G10R10_SINT_PACK32 = 69, + VK_FORMAT_R16_UNORM = 70, + VK_FORMAT_R16_SNORM = 71, + VK_FORMAT_R16_USCALED = 72, + VK_FORMAT_R16_SSCALED = 73, + VK_FORMAT_R16_UINT = 74, + VK_FORMAT_R16_SINT = 75, + VK_FORMAT_R16_SFLOAT = 76, + VK_FORMAT_R16G16_UNORM = 77, + VK_FORMAT_R16G16_SNORM = 78, + VK_FORMAT_R16G16_USCALED = 79, + VK_FORMAT_R16G16_SSCALED = 80, + VK_FORMAT_R16G16_UINT = 81, + VK_FORMAT_R16G16_SINT = 82, + VK_FORMAT_R16G16_SFLOAT = 83, + VK_FORMAT_R16G16B16_UNORM = 84, + VK_FORMAT_R16G16B16_SNORM = 85, + VK_FORMAT_R16G16B16_USCALED = 86, + VK_FORMAT_R16G16B16_SSCALED = 87, + VK_FORMAT_R16G16B16_UINT = 88, + VK_FORMAT_R16G16B16_SINT = 89, + VK_FORMAT_R16G16B16_SFLOAT = 90, + VK_FORMAT_R16G16B16A16_UNORM = 91, + VK_FORMAT_R16G16B16A16_SNORM = 92, + VK_FORMAT_R16G16B16A16_USCALED = 93, + VK_FORMAT_R16G16B16A16_SSCALED = 94, + VK_FORMAT_R16G16B16A16_UINT = 95, + VK_FORMAT_R16G16B16A16_SINT = 96, + VK_FORMAT_R16G16B16A16_SFLOAT = 97, + VK_FORMAT_R32_UINT = 98, + VK_FORMAT_R32_SINT = 99, + VK_FORMAT_R32_SFLOAT = 100, + VK_FORMAT_R32G32_UINT = 101, + VK_FORMAT_R32G32_SINT = 102, + VK_FORMAT_R32G32_SFLOAT = 103, + VK_FORMAT_R32G32B32_UINT = 104, + VK_FORMAT_R32G32B32_SINT = 105, + VK_FORMAT_R32G32B32_SFLOAT = 106, + VK_FORMAT_R32G32B32A32_UINT = 107, + VK_FORMAT_R32G32B32A32_SINT = 108, + VK_FORMAT_R32G32B32A32_SFLOAT = 109, + VK_FORMAT_R64_UINT = 110, + VK_FORMAT_R64_SINT = 111, + VK_FORMAT_R64_SFLOAT = 112, + VK_FORMAT_R64G64_UINT = 113, + VK_FORMAT_R64G64_SINT = 114, + VK_FORMAT_R64G64_SFLOAT = 115, + VK_FORMAT_R64G64B64_UINT = 116, + VK_FORMAT_R64G64B64_SINT = 117, + VK_FORMAT_R64G64B64_SFLOAT = 118, + VK_FORMAT_R64G64B64A64_UINT = 119, + VK_FORMAT_R64G64B64A64_SINT = 120, + VK_FORMAT_R64G64B64A64_SFLOAT = 121, + VK_FORMAT_B10G11R11_UFLOAT_PACK32 = 122, + VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 = 123, + VK_FORMAT_D16_UNORM = 124, + VK_FORMAT_X8_D24_UNORM_PACK32 = 125, + VK_FORMAT_D32_SFLOAT = 126, + VK_FORMAT_S8_UINT = 127, + VK_FORMAT_D16_UNORM_S8_UINT = 128, + VK_FORMAT_D24_UNORM_S8_UINT = 129, + VK_FORMAT_D32_SFLOAT_S8_UINT = 130, + VK_FORMAT_BC1_RGB_UNORM_BLOCK = 131, + VK_FORMAT_BC1_RGB_SRGB_BLOCK = 132, + VK_FORMAT_BC1_RGBA_UNORM_BLOCK = 133, + VK_FORMAT_BC1_RGBA_SRGB_BLOCK = 134, + VK_FORMAT_BC2_UNORM_BLOCK = 135, + VK_FORMAT_BC2_SRGB_BLOCK = 136, + VK_FORMAT_BC3_UNORM_BLOCK = 137, + VK_FORMAT_BC3_SRGB_BLOCK = 138, + VK_FORMAT_BC4_UNORM_BLOCK = 139, + VK_FORMAT_BC4_SNORM_BLOCK = 140, + VK_FORMAT_BC5_UNORM_BLOCK = 141, + VK_FORMAT_BC5_SNORM_BLOCK = 142, + VK_FORMAT_BC6H_UFLOAT_BLOCK = 143, + VK_FORMAT_BC6H_SFLOAT_BLOCK = 144, + VK_FORMAT_BC7_UNORM_BLOCK = 145, + VK_FORMAT_BC7_SRGB_BLOCK = 146, + VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = 147, + VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = 148, + VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK = 149, + VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = 150, + VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK = 151, + VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = 152, + VK_FORMAT_EAC_R11_UNORM_BLOCK = 153, + VK_FORMAT_EAC_R11_SNORM_BLOCK = 154, + VK_FORMAT_EAC_R11G11_UNORM_BLOCK = 155, + VK_FORMAT_EAC_R11G11_SNORM_BLOCK = 156, + VK_FORMAT_ASTC_4x4_UNORM_BLOCK = 157, + VK_FORMAT_ASTC_4x4_SRGB_BLOCK = 158, + VK_FORMAT_ASTC_5x4_UNORM_BLOCK = 159, + VK_FORMAT_ASTC_5x4_SRGB_BLOCK = 160, + VK_FORMAT_ASTC_5x5_UNORM_BLOCK = 161, + VK_FORMAT_ASTC_5x5_SRGB_BLOCK = 162, + VK_FORMAT_ASTC_6x5_UNORM_BLOCK = 163, + VK_FORMAT_ASTC_6x5_SRGB_BLOCK = 164, + VK_FORMAT_ASTC_6x6_UNORM_BLOCK = 165, + VK_FORMAT_ASTC_6x6_SRGB_BLOCK = 166, + VK_FORMAT_ASTC_8x5_UNORM_BLOCK = 167, + VK_FORMAT_ASTC_8x5_SRGB_BLOCK = 168, + VK_FORMAT_ASTC_8x6_UNORM_BLOCK = 169, + VK_FORMAT_ASTC_8x6_SRGB_BLOCK = 170, + VK_FORMAT_ASTC_8x8_UNORM_BLOCK = 171, + VK_FORMAT_ASTC_8x8_SRGB_BLOCK = 172, + VK_FORMAT_ASTC_10x5_UNORM_BLOCK = 173, + VK_FORMAT_ASTC_10x5_SRGB_BLOCK = 174, + VK_FORMAT_ASTC_10x6_UNORM_BLOCK = 175, + VK_FORMAT_ASTC_10x6_SRGB_BLOCK = 176, + VK_FORMAT_ASTC_10x8_UNORM_BLOCK = 177, + VK_FORMAT_ASTC_10x8_SRGB_BLOCK = 178, + VK_FORMAT_ASTC_10x10_UNORM_BLOCK = 179, + VK_FORMAT_ASTC_10x10_SRGB_BLOCK = 180, + VK_FORMAT_ASTC_12x10_UNORM_BLOCK = 181, + VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, + VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, + VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, + VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG = 1000054000, + VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG = 1000054001, + VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG = 1000054002, + VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG = 1000054003, + VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG = 1000054004, + VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 1000054005, + VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 1000054006, + VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 1000054007, + VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED, + VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK, + VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1), // NOLINT: misc-redundant-expression + VK_FORMAT_MAX_ENUM = 0x7FFFFFFF +} VkFormat; + +typedef enum VkImageType { + VK_IMAGE_TYPE_1D = 0, + VK_IMAGE_TYPE_2D = 1, + VK_IMAGE_TYPE_3D = 2, + VK_IMAGE_TYPE_BEGIN_RANGE = VK_IMAGE_TYPE_1D, + VK_IMAGE_TYPE_END_RANGE = VK_IMAGE_TYPE_3D, + VK_IMAGE_TYPE_RANGE_SIZE = (VK_IMAGE_TYPE_3D - VK_IMAGE_TYPE_1D + 1), // NOLINT: misc-redundant-expression + VK_IMAGE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkImageType; + +typedef enum VkImageTiling { + VK_IMAGE_TILING_OPTIMAL = 0, + VK_IMAGE_TILING_LINEAR = 1, + VK_IMAGE_TILING_BEGIN_RANGE = VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_TILING_END_RANGE = VK_IMAGE_TILING_LINEAR, + VK_IMAGE_TILING_RANGE_SIZE = (VK_IMAGE_TILING_LINEAR - VK_IMAGE_TILING_OPTIMAL + 1), // NOLINT: misc-redundant-expression + VK_IMAGE_TILING_MAX_ENUM = 0x7FFFFFFF +} VkImageTiling; + +typedef enum VkPhysicalDeviceType { + VK_PHYSICAL_DEVICE_TYPE_OTHER = 0, + VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU = 1, + VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU = 2, + VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU = 3, + VK_PHYSICAL_DEVICE_TYPE_CPU = 4, + VK_PHYSICAL_DEVICE_TYPE_BEGIN_RANGE = VK_PHYSICAL_DEVICE_TYPE_OTHER, + VK_PHYSICAL_DEVICE_TYPE_END_RANGE = VK_PHYSICAL_DEVICE_TYPE_CPU, + VK_PHYSICAL_DEVICE_TYPE_RANGE_SIZE = (VK_PHYSICAL_DEVICE_TYPE_CPU - VK_PHYSICAL_DEVICE_TYPE_OTHER + 1), // NOLINT: misc-redundant-expression + VK_PHYSICAL_DEVICE_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkPhysicalDeviceType; + +typedef enum VkQueryType { + VK_QUERY_TYPE_OCCLUSION = 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS = 1, + VK_QUERY_TYPE_TIMESTAMP = 2, + VK_QUERY_TYPE_BEGIN_RANGE = VK_QUERY_TYPE_OCCLUSION, + VK_QUERY_TYPE_END_RANGE = VK_QUERY_TYPE_TIMESTAMP, + VK_QUERY_TYPE_RANGE_SIZE = (VK_QUERY_TYPE_TIMESTAMP - VK_QUERY_TYPE_OCCLUSION + 1), // NOLINT: misc-redundant-expression + VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkQueryType; + +typedef enum VkSharingMode { + VK_SHARING_MODE_EXCLUSIVE = 0, + VK_SHARING_MODE_CONCURRENT = 1, + VK_SHARING_MODE_BEGIN_RANGE = VK_SHARING_MODE_EXCLUSIVE, + VK_SHARING_MODE_END_RANGE = VK_SHARING_MODE_CONCURRENT, + VK_SHARING_MODE_RANGE_SIZE = (VK_SHARING_MODE_CONCURRENT - VK_SHARING_MODE_EXCLUSIVE + 1), // NOLINT: misc-redundant-expression + VK_SHARING_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSharingMode; + +typedef enum VkImageLayout { + VK_IMAGE_LAYOUT_UNDEFINED = 0, + VK_IMAGE_LAYOUT_GENERAL = 1, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL = 2, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL = 3, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL = 4, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL = 5, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL = 6, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7, + VK_IMAGE_LAYOUT_PREINITIALIZED = 8, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002, + VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000, + VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1), // NOLINT: misc-redundant-expression + VK_IMAGE_LAYOUT_MAX_ENUM = 0x7FFFFFFF +} VkImageLayout; + +typedef enum VkImageViewType { + VK_IMAGE_VIEW_TYPE_1D = 0, + VK_IMAGE_VIEW_TYPE_2D = 1, + VK_IMAGE_VIEW_TYPE_3D = 2, + VK_IMAGE_VIEW_TYPE_CUBE = 3, + VK_IMAGE_VIEW_TYPE_1D_ARRAY = 4, + VK_IMAGE_VIEW_TYPE_2D_ARRAY = 5, + VK_IMAGE_VIEW_TYPE_CUBE_ARRAY = 6, + VK_IMAGE_VIEW_TYPE_BEGIN_RANGE = VK_IMAGE_VIEW_TYPE_1D, + VK_IMAGE_VIEW_TYPE_END_RANGE = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY, + VK_IMAGE_VIEW_TYPE_RANGE_SIZE = (VK_IMAGE_VIEW_TYPE_CUBE_ARRAY - VK_IMAGE_VIEW_TYPE_1D + 1), // NOLINT: misc-redundant-expression + VK_IMAGE_VIEW_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkImageViewType; + +typedef enum VkComponentSwizzle { + VK_COMPONENT_SWIZZLE_IDENTITY = 0, + VK_COMPONENT_SWIZZLE_ZERO = 1, + VK_COMPONENT_SWIZZLE_ONE = 2, + VK_COMPONENT_SWIZZLE_R = 3, + VK_COMPONENT_SWIZZLE_G = 4, + VK_COMPONENT_SWIZZLE_B = 5, + VK_COMPONENT_SWIZZLE_A = 6, + VK_COMPONENT_SWIZZLE_BEGIN_RANGE = VK_COMPONENT_SWIZZLE_IDENTITY, + VK_COMPONENT_SWIZZLE_END_RANGE = VK_COMPONENT_SWIZZLE_A, + VK_COMPONENT_SWIZZLE_RANGE_SIZE = (VK_COMPONENT_SWIZZLE_A - VK_COMPONENT_SWIZZLE_IDENTITY + 1), // NOLINT: misc-redundant-expression + VK_COMPONENT_SWIZZLE_MAX_ENUM = 0x7FFFFFFF +} VkComponentSwizzle; + +typedef enum VkVertexInputRate { + VK_VERTEX_INPUT_RATE_VERTEX = 0, + VK_VERTEX_INPUT_RATE_INSTANCE = 1, + VK_VERTEX_INPUT_RATE_BEGIN_RANGE = VK_VERTEX_INPUT_RATE_VERTEX, + VK_VERTEX_INPUT_RATE_END_RANGE = VK_VERTEX_INPUT_RATE_INSTANCE, + VK_VERTEX_INPUT_RATE_RANGE_SIZE = (VK_VERTEX_INPUT_RATE_INSTANCE - VK_VERTEX_INPUT_RATE_VERTEX + 1), // NOLINT: misc-redundant-expression + VK_VERTEX_INPUT_RATE_MAX_ENUM = 0x7FFFFFFF +} VkVertexInputRate; + +typedef enum VkPrimitiveTopology { + VK_PRIMITIVE_TOPOLOGY_POINT_LIST = 0, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST = 1, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP = 2, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST = 3, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP = 4, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN = 5, + VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY = 6, + VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY = 7, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY = 8, + VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY = 9, + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST = 10, + VK_PRIMITIVE_TOPOLOGY_BEGIN_RANGE = VK_PRIMITIVE_TOPOLOGY_POINT_LIST, + VK_PRIMITIVE_TOPOLOGY_END_RANGE = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + VK_PRIMITIVE_TOPOLOGY_RANGE_SIZE = (VK_PRIMITIVE_TOPOLOGY_PATCH_LIST - VK_PRIMITIVE_TOPOLOGY_POINT_LIST + 1), // NOLINT: misc-redundant-expression + VK_PRIMITIVE_TOPOLOGY_MAX_ENUM = 0x7FFFFFFF +} VkPrimitiveTopology; + +typedef enum VkPolygonMode { + VK_POLYGON_MODE_FILL = 0, + VK_POLYGON_MODE_LINE = 1, + VK_POLYGON_MODE_POINT = 2, + VK_POLYGON_MODE_FILL_RECTANGLE_NV = 1000153000, + VK_POLYGON_MODE_BEGIN_RANGE = VK_POLYGON_MODE_FILL, + VK_POLYGON_MODE_END_RANGE = VK_POLYGON_MODE_POINT, + VK_POLYGON_MODE_RANGE_SIZE = (VK_POLYGON_MODE_POINT - VK_POLYGON_MODE_FILL + 1), // NOLINT: misc-redundant-expression + VK_POLYGON_MODE_MAX_ENUM = 0x7FFFFFFF +} VkPolygonMode; + +typedef enum VkFrontFace { + VK_FRONT_FACE_COUNTER_CLOCKWISE = 0, + VK_FRONT_FACE_CLOCKWISE = 1, + VK_FRONT_FACE_BEGIN_RANGE = VK_FRONT_FACE_COUNTER_CLOCKWISE, + VK_FRONT_FACE_END_RANGE = VK_FRONT_FACE_CLOCKWISE, + VK_FRONT_FACE_RANGE_SIZE = (VK_FRONT_FACE_CLOCKWISE - VK_FRONT_FACE_COUNTER_CLOCKWISE + 1), // NOLINT: misc-redundant-expression + VK_FRONT_FACE_MAX_ENUM = 0x7FFFFFFF +} VkFrontFace; + +typedef enum VkCompareOp { + VK_COMPARE_OP_NEVER = 0, + VK_COMPARE_OP_LESS = 1, + VK_COMPARE_OP_EQUAL = 2, + VK_COMPARE_OP_LESS_OR_EQUAL = 3, + VK_COMPARE_OP_GREATER = 4, + VK_COMPARE_OP_NOT_EQUAL = 5, + VK_COMPARE_OP_GREATER_OR_EQUAL = 6, + VK_COMPARE_OP_ALWAYS = 7, + VK_COMPARE_OP_BEGIN_RANGE = VK_COMPARE_OP_NEVER, + VK_COMPARE_OP_END_RANGE = VK_COMPARE_OP_ALWAYS, + VK_COMPARE_OP_RANGE_SIZE = (VK_COMPARE_OP_ALWAYS - VK_COMPARE_OP_NEVER + 1), // NOLINT: misc-redundant-expression + VK_COMPARE_OP_MAX_ENUM = 0x7FFFFFFF +} VkCompareOp; + +typedef enum VkStencilOp { + VK_STENCIL_OP_KEEP = 0, + VK_STENCIL_OP_ZERO = 1, + VK_STENCIL_OP_REPLACE = 2, + VK_STENCIL_OP_INCREMENT_AND_CLAMP = 3, + VK_STENCIL_OP_DECREMENT_AND_CLAMP = 4, + VK_STENCIL_OP_INVERT = 5, + VK_STENCIL_OP_INCREMENT_AND_WRAP = 6, + VK_STENCIL_OP_DECREMENT_AND_WRAP = 7, + VK_STENCIL_OP_BEGIN_RANGE = VK_STENCIL_OP_KEEP, + VK_STENCIL_OP_END_RANGE = VK_STENCIL_OP_DECREMENT_AND_WRAP, + VK_STENCIL_OP_RANGE_SIZE = (VK_STENCIL_OP_DECREMENT_AND_WRAP - VK_STENCIL_OP_KEEP + 1), // NOLINT: misc-redundant-expression + VK_STENCIL_OP_MAX_ENUM = 0x7FFFFFFF +} VkStencilOp; + +typedef enum VkLogicOp { + VK_LOGIC_OP_CLEAR = 0, + VK_LOGIC_OP_AND = 1, + VK_LOGIC_OP_AND_REVERSE = 2, + VK_LOGIC_OP_COPY = 3, + VK_LOGIC_OP_AND_INVERTED = 4, + VK_LOGIC_OP_NO_OP = 5, + VK_LOGIC_OP_XOR = 6, + VK_LOGIC_OP_OR = 7, + VK_LOGIC_OP_NOR = 8, + VK_LOGIC_OP_EQUIVALENT = 9, + VK_LOGIC_OP_INVERT = 10, + VK_LOGIC_OP_OR_REVERSE = 11, + VK_LOGIC_OP_COPY_INVERTED = 12, + VK_LOGIC_OP_OR_INVERTED = 13, + VK_LOGIC_OP_NAND = 14, + VK_LOGIC_OP_SET = 15, + VK_LOGIC_OP_BEGIN_RANGE = VK_LOGIC_OP_CLEAR, + VK_LOGIC_OP_END_RANGE = VK_LOGIC_OP_SET, + VK_LOGIC_OP_RANGE_SIZE = (VK_LOGIC_OP_SET - VK_LOGIC_OP_CLEAR + 1), // NOLINT: misc-redundant-expression + VK_LOGIC_OP_MAX_ENUM = 0x7FFFFFFF +} VkLogicOp; + +typedef enum VkBlendFactor { + VK_BLEND_FACTOR_ZERO = 0, + VK_BLEND_FACTOR_ONE = 1, + VK_BLEND_FACTOR_SRC_COLOR = 2, + VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR = 3, + VK_BLEND_FACTOR_DST_COLOR = 4, + VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR = 5, + VK_BLEND_FACTOR_SRC_ALPHA = 6, + VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA = 7, + VK_BLEND_FACTOR_DST_ALPHA = 8, + VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA = 9, + VK_BLEND_FACTOR_CONSTANT_COLOR = 10, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR = 11, + VK_BLEND_FACTOR_CONSTANT_ALPHA = 12, + VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA = 13, + VK_BLEND_FACTOR_SRC_ALPHA_SATURATE = 14, + VK_BLEND_FACTOR_SRC1_COLOR = 15, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR = 16, + VK_BLEND_FACTOR_SRC1_ALPHA = 17, + VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA = 18, + VK_BLEND_FACTOR_BEGIN_RANGE = VK_BLEND_FACTOR_ZERO, + VK_BLEND_FACTOR_END_RANGE = VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, + VK_BLEND_FACTOR_RANGE_SIZE = (VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA - VK_BLEND_FACTOR_ZERO + 1), // NOLINT: misc-redundant-expression + VK_BLEND_FACTOR_MAX_ENUM = 0x7FFFFFFF +} VkBlendFactor; + +typedef enum VkBlendOp { + VK_BLEND_OP_ADD = 0, + VK_BLEND_OP_SUBTRACT = 1, + VK_BLEND_OP_REVERSE_SUBTRACT = 2, + VK_BLEND_OP_MIN = 3, + VK_BLEND_OP_MAX = 4, + VK_BLEND_OP_ZERO_EXT = 1000148000, + VK_BLEND_OP_SRC_EXT = 1000148001, + VK_BLEND_OP_DST_EXT = 1000148002, + VK_BLEND_OP_SRC_OVER_EXT = 1000148003, + VK_BLEND_OP_DST_OVER_EXT = 1000148004, + VK_BLEND_OP_SRC_IN_EXT = 1000148005, + VK_BLEND_OP_DST_IN_EXT = 1000148006, + VK_BLEND_OP_SRC_OUT_EXT = 1000148007, + VK_BLEND_OP_DST_OUT_EXT = 1000148008, + VK_BLEND_OP_SRC_ATOP_EXT = 1000148009, + VK_BLEND_OP_DST_ATOP_EXT = 1000148010, + VK_BLEND_OP_XOR_EXT = 1000148011, + VK_BLEND_OP_MULTIPLY_EXT = 1000148012, + VK_BLEND_OP_SCREEN_EXT = 1000148013, + VK_BLEND_OP_OVERLAY_EXT = 1000148014, + VK_BLEND_OP_DARKEN_EXT = 1000148015, + VK_BLEND_OP_LIGHTEN_EXT = 1000148016, + VK_BLEND_OP_COLORDODGE_EXT = 1000148017, + VK_BLEND_OP_COLORBURN_EXT = 1000148018, + VK_BLEND_OP_HARDLIGHT_EXT = 1000148019, + VK_BLEND_OP_SOFTLIGHT_EXT = 1000148020, + VK_BLEND_OP_DIFFERENCE_EXT = 1000148021, + VK_BLEND_OP_EXCLUSION_EXT = 1000148022, + VK_BLEND_OP_INVERT_EXT = 1000148023, + VK_BLEND_OP_INVERT_RGB_EXT = 1000148024, + VK_BLEND_OP_LINEARDODGE_EXT = 1000148025, + VK_BLEND_OP_LINEARBURN_EXT = 1000148026, + VK_BLEND_OP_VIVIDLIGHT_EXT = 1000148027, + VK_BLEND_OP_LINEARLIGHT_EXT = 1000148028, + VK_BLEND_OP_PINLIGHT_EXT = 1000148029, + VK_BLEND_OP_HARDMIX_EXT = 1000148030, + VK_BLEND_OP_HSL_HUE_EXT = 1000148031, + VK_BLEND_OP_HSL_SATURATION_EXT = 1000148032, + VK_BLEND_OP_HSL_COLOR_EXT = 1000148033, + VK_BLEND_OP_HSL_LUMINOSITY_EXT = 1000148034, + VK_BLEND_OP_PLUS_EXT = 1000148035, + VK_BLEND_OP_PLUS_CLAMPED_EXT = 1000148036, + VK_BLEND_OP_PLUS_CLAMPED_ALPHA_EXT = 1000148037, + VK_BLEND_OP_PLUS_DARKER_EXT = 1000148038, + VK_BLEND_OP_MINUS_EXT = 1000148039, + VK_BLEND_OP_MINUS_CLAMPED_EXT = 1000148040, + VK_BLEND_OP_CONTRAST_EXT = 1000148041, + VK_BLEND_OP_INVERT_OVG_EXT = 1000148042, + VK_BLEND_OP_RED_EXT = 1000148043, + VK_BLEND_OP_GREEN_EXT = 1000148044, + VK_BLEND_OP_BLUE_EXT = 1000148045, + VK_BLEND_OP_BEGIN_RANGE = VK_BLEND_OP_ADD, + VK_BLEND_OP_END_RANGE = VK_BLEND_OP_MAX, + VK_BLEND_OP_RANGE_SIZE = (VK_BLEND_OP_MAX - VK_BLEND_OP_ADD + 1), // NOLINT: misc-redundant-expression + VK_BLEND_OP_MAX_ENUM = 0x7FFFFFFF +} VkBlendOp; + +typedef enum VkDynamicState { + VK_DYNAMIC_STATE_VIEWPORT = 0, + VK_DYNAMIC_STATE_SCISSOR = 1, + VK_DYNAMIC_STATE_LINE_WIDTH = 2, + VK_DYNAMIC_STATE_DEPTH_BIAS = 3, + VK_DYNAMIC_STATE_BLEND_CONSTANTS = 4, + VK_DYNAMIC_STATE_DEPTH_BOUNDS = 5, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK = 6, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK = 7, + VK_DYNAMIC_STATE_STENCIL_REFERENCE = 8, + VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV = 1000087000, + VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT = 1000099000, + VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT = 1000143000, + VK_DYNAMIC_STATE_BEGIN_RANGE = VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_END_RANGE = VK_DYNAMIC_STATE_STENCIL_REFERENCE, + VK_DYNAMIC_STATE_RANGE_SIZE = (VK_DYNAMIC_STATE_STENCIL_REFERENCE - VK_DYNAMIC_STATE_VIEWPORT + 1), // NOLINT: misc-redundant-expression + VK_DYNAMIC_STATE_MAX_ENUM = 0x7FFFFFFF +} VkDynamicState; + +typedef enum VkFilter { + VK_FILTER_NEAREST = 0, + VK_FILTER_LINEAR = 1, + VK_FILTER_CUBIC_IMG = 1000015000, + VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST, + VK_FILTER_END_RANGE = VK_FILTER_LINEAR, + VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1), // NOLINT: misc-redundant-expression + VK_FILTER_MAX_ENUM = 0x7FFFFFFF +} VkFilter; + +typedef enum VkSamplerMipmapMode { + VK_SAMPLER_MIPMAP_MODE_NEAREST = 0, + VK_SAMPLER_MIPMAP_MODE_LINEAR = 1, + VK_SAMPLER_MIPMAP_MODE_BEGIN_RANGE = VK_SAMPLER_MIPMAP_MODE_NEAREST, + VK_SAMPLER_MIPMAP_MODE_END_RANGE = VK_SAMPLER_MIPMAP_MODE_LINEAR, + VK_SAMPLER_MIPMAP_MODE_RANGE_SIZE = (VK_SAMPLER_MIPMAP_MODE_LINEAR - VK_SAMPLER_MIPMAP_MODE_NEAREST + 1), // NOLINT: misc-redundant-expression + VK_SAMPLER_MIPMAP_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerMipmapMode; + +typedef enum VkSamplerAddressMode { + VK_SAMPLER_ADDRESS_MODE_REPEAT = 0, + VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT = 1, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, + VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), // NOLINT: misc-redundant-expression + VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF +} VkSamplerAddressMode; + +typedef enum VkBorderColor { + VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK = 0, + VK_BORDER_COLOR_INT_TRANSPARENT_BLACK = 1, + VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK = 2, + VK_BORDER_COLOR_INT_OPAQUE_BLACK = 3, + VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE = 4, + VK_BORDER_COLOR_INT_OPAQUE_WHITE = 5, + VK_BORDER_COLOR_BEGIN_RANGE = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + VK_BORDER_COLOR_END_RANGE = VK_BORDER_COLOR_INT_OPAQUE_WHITE, + VK_BORDER_COLOR_RANGE_SIZE = (VK_BORDER_COLOR_INT_OPAQUE_WHITE - VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK + 1), // NOLINT: misc-redundant-expression + VK_BORDER_COLOR_MAX_ENUM = 0x7FFFFFFF +} VkBorderColor; + +typedef enum VkDescriptorType { + VK_DESCRIPTOR_TYPE_SAMPLER = 0, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER = 1, + VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE = 2, + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE = 3, + VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER = 4, + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER = 5, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER = 6, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER = 7, + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC = 8, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC = 9, + VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT = 10, + VK_DESCRIPTOR_TYPE_BEGIN_RANGE = VK_DESCRIPTOR_TYPE_SAMPLER, + VK_DESCRIPTOR_TYPE_END_RANGE = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, + VK_DESCRIPTOR_TYPE_RANGE_SIZE = (VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT - VK_DESCRIPTOR_TYPE_SAMPLER + 1), // NOLINT: misc-redundant-expression + VK_DESCRIPTOR_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorType; + +typedef enum VkAttachmentLoadOp { + VK_ATTACHMENT_LOAD_OP_LOAD = 0, + VK_ATTACHMENT_LOAD_OP_CLEAR = 1, + VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, + VK_ATTACHMENT_LOAD_OP_BEGIN_RANGE = VK_ATTACHMENT_LOAD_OP_LOAD, + VK_ATTACHMENT_LOAD_OP_END_RANGE = VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_ATTACHMENT_LOAD_OP_RANGE_SIZE = (VK_ATTACHMENT_LOAD_OP_DONT_CARE - VK_ATTACHMENT_LOAD_OP_LOAD + 1), // NOLINT: misc-redundant-expression + VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF +} VkAttachmentLoadOp; + +typedef enum VkAttachmentStoreOp { + VK_ATTACHMENT_STORE_OP_STORE = 0, + VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, + VK_ATTACHMENT_STORE_OP_BEGIN_RANGE = VK_ATTACHMENT_STORE_OP_STORE, + VK_ATTACHMENT_STORE_OP_END_RANGE = VK_ATTACHMENT_STORE_OP_DONT_CARE, + VK_ATTACHMENT_STORE_OP_RANGE_SIZE = (VK_ATTACHMENT_STORE_OP_DONT_CARE - VK_ATTACHMENT_STORE_OP_STORE + 1), // NOLINT: misc-redundant-expression + VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF +} VkAttachmentStoreOp; + +typedef enum VkPipelineBindPoint { + VK_PIPELINE_BIND_POINT_GRAPHICS = 0, + VK_PIPELINE_BIND_POINT_COMPUTE = 1, + VK_PIPELINE_BIND_POINT_BEGIN_RANGE = VK_PIPELINE_BIND_POINT_GRAPHICS, + VK_PIPELINE_BIND_POINT_END_RANGE = VK_PIPELINE_BIND_POINT_COMPUTE, + VK_PIPELINE_BIND_POINT_RANGE_SIZE = (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1), // NOLINT: misc-redundant-expression + VK_PIPELINE_BIND_POINT_MAX_ENUM = 0x7FFFFFFF +} VkPipelineBindPoint; + +typedef enum VkCommandBufferLevel { + VK_COMMAND_BUFFER_LEVEL_PRIMARY = 0, + VK_COMMAND_BUFFER_LEVEL_SECONDARY = 1, + VK_COMMAND_BUFFER_LEVEL_BEGIN_RANGE = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + VK_COMMAND_BUFFER_LEVEL_END_RANGE = VK_COMMAND_BUFFER_LEVEL_SECONDARY, + VK_COMMAND_BUFFER_LEVEL_RANGE_SIZE = (VK_COMMAND_BUFFER_LEVEL_SECONDARY - VK_COMMAND_BUFFER_LEVEL_PRIMARY + 1), // NOLINT: misc-redundant-expression + VK_COMMAND_BUFFER_LEVEL_MAX_ENUM = 0x7FFFFFFF +} VkCommandBufferLevel; + +typedef enum VkIndexType { + VK_INDEX_TYPE_UINT16 = 0, + VK_INDEX_TYPE_UINT32 = 1, + VK_INDEX_TYPE_BEGIN_RANGE = VK_INDEX_TYPE_UINT16, + VK_INDEX_TYPE_END_RANGE = VK_INDEX_TYPE_UINT32, + VK_INDEX_TYPE_RANGE_SIZE = (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1), // NOLINT: misc-redundant-expression + VK_INDEX_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkIndexType; + +typedef enum VkSubpassContents { + VK_SUBPASS_CONTENTS_INLINE = 0, + VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, + VK_SUBPASS_CONTENTS_BEGIN_RANGE = VK_SUBPASS_CONTENTS_INLINE, + VK_SUBPASS_CONTENTS_END_RANGE = VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS, + VK_SUBPASS_CONTENTS_RANGE_SIZE = (VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS - VK_SUBPASS_CONTENTS_INLINE + 1), // NOLINT: misc-redundant-expression + VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF +} VkSubpassContents; + +typedef enum VkObjectType { + VK_OBJECT_TYPE_UNKNOWN = 0, + VK_OBJECT_TYPE_INSTANCE = 1, + VK_OBJECT_TYPE_PHYSICAL_DEVICE = 2, + VK_OBJECT_TYPE_DEVICE = 3, + VK_OBJECT_TYPE_QUEUE = 4, + VK_OBJECT_TYPE_SEMAPHORE = 5, + VK_OBJECT_TYPE_COMMAND_BUFFER = 6, + VK_OBJECT_TYPE_FENCE = 7, + VK_OBJECT_TYPE_DEVICE_MEMORY = 8, + VK_OBJECT_TYPE_BUFFER = 9, + VK_OBJECT_TYPE_IMAGE = 10, + VK_OBJECT_TYPE_EVENT = 11, + VK_OBJECT_TYPE_QUERY_POOL = 12, + VK_OBJECT_TYPE_BUFFER_VIEW = 13, + VK_OBJECT_TYPE_IMAGE_VIEW = 14, + VK_OBJECT_TYPE_SHADER_MODULE = 15, + VK_OBJECT_TYPE_PIPELINE_CACHE = 16, + VK_OBJECT_TYPE_PIPELINE_LAYOUT = 17, + VK_OBJECT_TYPE_RENDER_PASS = 18, + VK_OBJECT_TYPE_PIPELINE = 19, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 20, + VK_OBJECT_TYPE_SAMPLER = 21, + VK_OBJECT_TYPE_DESCRIPTOR_POOL = 22, + VK_OBJECT_TYPE_DESCRIPTOR_SET = 23, + VK_OBJECT_TYPE_FRAMEBUFFER = 24, + VK_OBJECT_TYPE_COMMAND_POOL = 25, + VK_OBJECT_TYPE_SURFACE_KHR = 1000000000, + VK_OBJECT_TYPE_SWAPCHAIN_KHR = 1000001000, + VK_OBJECT_TYPE_DISPLAY_KHR = 1000002000, + VK_OBJECT_TYPE_DISPLAY_MODE_KHR = 1000002001, + VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT = 1000011000, + VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = 1000085000, + VK_OBJECT_TYPE_OBJECT_TABLE_NVX = 1000086000, + VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001, + VK_OBJECT_TYPE_VALIDATION_CACHE_EXT = 1000160000, + VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN, + VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_COMMAND_POOL, + VK_OBJECT_TYPE_RANGE_SIZE = (VK_OBJECT_TYPE_COMMAND_POOL - VK_OBJECT_TYPE_UNKNOWN + 1), // NOLINT: misc-redundant-expression + VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF +} VkObjectType; + +typedef enum VkInstanceCreateFlagBits { + VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR = 0x00000001, + VK_INSTANCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkInstanceCreateFlagBits; +typedef VkFlags VkInstanceCreateFlags; + +typedef enum VkFormatFeatureFlagBits { + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT = 0x00000001, + VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT = 0x00000002, + VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT = 0x00000004, + VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000008, + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT = 0x00000010, + VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT = 0x00000020, + VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT = 0x00000040, + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT = 0x00000080, + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT = 0x00000100, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000200, + VK_FORMAT_FEATURE_BLIT_SRC_BIT = 0x00000400, + VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT = 0x00001000, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000, + VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR = 0x00004000, + VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR = 0x00008000, + VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT = 0x00010000, + VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkFormatFeatureFlagBits; +typedef VkFlags VkFormatFeatureFlags; + +typedef enum VkImageUsageFlagBits { + VK_IMAGE_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_IMAGE_USAGE_TRANSFER_DST_BIT = 0x00000002, + VK_IMAGE_USAGE_SAMPLED_BIT = 0x00000004, + VK_IMAGE_USAGE_STORAGE_BIT = 0x00000008, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT = 0x00000010, + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020, + VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040, + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080, + VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageUsageFlagBits; +typedef VkFlags VkImageUsageFlags; + +typedef enum VkImageCreateFlagBits { + VK_IMAGE_CREATE_SPARSE_BINDING_BIT = 0x00000001, + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004, + VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008, + VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010, + VK_IMAGE_CREATE_BIND_SFR_BIT_KHX = 0x00000040, + VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = 0x00000020, + VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000, + VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageCreateFlagBits; +typedef VkFlags VkImageCreateFlags; + +typedef enum VkSampleCountFlagBits { + VK_SAMPLE_COUNT_1_BIT = 0x00000001, + VK_SAMPLE_COUNT_2_BIT = 0x00000002, + VK_SAMPLE_COUNT_4_BIT = 0x00000004, + VK_SAMPLE_COUNT_8_BIT = 0x00000008, + VK_SAMPLE_COUNT_16_BIT = 0x00000010, + VK_SAMPLE_COUNT_32_BIT = 0x00000020, + VK_SAMPLE_COUNT_64_BIT = 0x00000040, + VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSampleCountFlagBits; +typedef VkFlags VkSampleCountFlags; + +typedef enum VkQueueFlagBits { + VK_QUEUE_GRAPHICS_BIT = 0x00000001, + VK_QUEUE_COMPUTE_BIT = 0x00000002, + VK_QUEUE_TRANSFER_BIT = 0x00000004, + VK_QUEUE_SPARSE_BINDING_BIT = 0x00000008, + VK_QUEUE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkQueueFlagBits; +typedef VkFlags VkQueueFlags; + +typedef enum VkMemoryPropertyFlagBits { + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT = 0x00000002, + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, + VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, + VK_MEMORY_PROPERTY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkMemoryPropertyFlagBits; +typedef VkFlags VkMemoryPropertyFlags; + +typedef enum VkMemoryHeapFlagBits { + VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001, + VK_MEMORY_HEAP_MULTI_INSTANCE_BIT_KHX = 0x00000002, + VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkMemoryHeapFlagBits; +typedef VkFlags VkMemoryHeapFlags; +typedef VkFlags VkDeviceCreateFlags; +typedef VkFlags VkDeviceQueueCreateFlags; + +typedef enum VkPipelineStageFlagBits { + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT = 0x00000001, + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT = 0x00000002, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT = 0x00000004, + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT = 0x00000008, + VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT = 0x00000010, + VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT = 0x00000020, + VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT = 0x00000040, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT = 0x00000080, + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT = 0x00000100, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT = 0x00000200, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT = 0x00000400, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT = 0x00000800, + VK_PIPELINE_STAGE_TRANSFER_BIT = 0x00001000, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT = 0x00002000, + VK_PIPELINE_STAGE_HOST_BIT = 0x00004000, + VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000, + VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000, + VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineStageFlagBits; +typedef VkFlags VkPipelineStageFlags; +typedef VkFlags VkMemoryMapFlags; + +typedef enum VkImageAspectFlagBits { + VK_IMAGE_ASPECT_COLOR_BIT = 0x00000001, + VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002, + VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004, + VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008, + VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkImageAspectFlagBits; +typedef VkFlags VkImageAspectFlags; + +typedef enum VkSparseImageFormatFlagBits { + VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT = 0x00000001, + VK_SPARSE_IMAGE_FORMAT_ALIGNED_MIP_SIZE_BIT = 0x00000002, + VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT = 0x00000004, + VK_SPARSE_IMAGE_FORMAT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSparseImageFormatFlagBits; +typedef VkFlags VkSparseImageFormatFlags; + +typedef enum VkSparseMemoryBindFlagBits { + VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001, + VK_SPARSE_MEMORY_BIND_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSparseMemoryBindFlagBits; +typedef VkFlags VkSparseMemoryBindFlags; + +typedef enum VkFenceCreateFlagBits { + VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001, + VK_FENCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkFenceCreateFlagBits; +typedef VkFlags VkFenceCreateFlags; +typedef VkFlags VkSemaphoreCreateFlags; +typedef VkFlags VkEventCreateFlags; +typedef VkFlags VkQueryPoolCreateFlags; + +typedef enum VkQueryPipelineStatisticFlagBits { + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT = 0x00000001, + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT = 0x00000002, + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT = 0x00000004, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT = 0x00000008, + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT = 0x00000010, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT = 0x00000020, + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT = 0x00000040, + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT = 0x00000080, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT = 0x00000100, + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT = 0x00000200, + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT = 0x00000400, + VK_QUERY_PIPELINE_STATISTIC_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkQueryPipelineStatisticFlagBits; +typedef VkFlags VkQueryPipelineStatisticFlags; + +typedef enum VkQueryResultFlagBits { + VK_QUERY_RESULT_64_BIT = 0x00000001, + VK_QUERY_RESULT_WAIT_BIT = 0x00000002, + VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004, + VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008, + VK_QUERY_RESULT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkQueryResultFlagBits; +typedef VkFlags VkQueryResultFlags; + +typedef enum VkBufferCreateFlagBits { + VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001, + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002, + VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004, + VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkBufferCreateFlagBits; +typedef VkFlags VkBufferCreateFlags; + +typedef enum VkBufferUsageFlagBits { + VK_BUFFER_USAGE_TRANSFER_SRC_BIT = 0x00000001, + VK_BUFFER_USAGE_TRANSFER_DST_BIT = 0x00000002, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT = 0x00000004, + VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT = 0x00000008, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT = 0x00000010, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT = 0x00000020, + VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080, + VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100, + VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkBufferUsageFlagBits; +typedef VkFlags VkBufferUsageFlags; +typedef VkFlags VkBufferViewCreateFlags; +typedef VkFlags VkImageViewCreateFlags; +typedef VkFlags VkShaderModuleCreateFlags; +typedef VkFlags VkPipelineCacheCreateFlags; + +typedef enum VkPipelineCreateFlagBits { + VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001, + VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002, + VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, + VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHX = 0x00000008, + VK_PIPELINE_CREATE_DISPATCH_BASE_KHX = 0x00000010, + VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineCreateFlagBits; +typedef VkFlags VkPipelineCreateFlags; +typedef VkFlags VkPipelineShaderStageCreateFlags; + +typedef enum VkShaderStageFlagBits { + VK_SHADER_STAGE_VERTEX_BIT = 0x00000001, + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT = 0x00000002, + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT = 0x00000004, + VK_SHADER_STAGE_GEOMETRY_BIT = 0x00000008, + VK_SHADER_STAGE_FRAGMENT_BIT = 0x00000010, + VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020, + VK_SHADER_STAGE_ALL_GRAPHICS = 0x0000001F, + VK_SHADER_STAGE_ALL = 0x7FFFFFFF, + VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkShaderStageFlagBits; +typedef VkFlags VkPipelineVertexInputStateCreateFlags; +typedef VkFlags VkPipelineInputAssemblyStateCreateFlags; +typedef VkFlags VkPipelineTessellationStateCreateFlags; +typedef VkFlags VkPipelineViewportStateCreateFlags; +typedef VkFlags VkPipelineRasterizationStateCreateFlags; + +typedef enum VkCullModeFlagBits { + VK_CULL_MODE_NONE = 0, + VK_CULL_MODE_FRONT_BIT = 0x00000001, + VK_CULL_MODE_BACK_BIT = 0x00000002, + VK_CULL_MODE_FRONT_AND_BACK = 0x00000003, + VK_CULL_MODE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkCullModeFlagBits; +typedef VkFlags VkCullModeFlags; +typedef VkFlags VkPipelineMultisampleStateCreateFlags; +typedef VkFlags VkPipelineDepthStencilStateCreateFlags; +typedef VkFlags VkPipelineColorBlendStateCreateFlags; + +typedef enum VkColorComponentFlagBits { + VK_COLOR_COMPONENT_R_BIT = 0x00000001, + VK_COLOR_COMPONENT_G_BIT = 0x00000002, + VK_COLOR_COMPONENT_B_BIT = 0x00000004, + VK_COLOR_COMPONENT_A_BIT = 0x00000008, + VK_COLOR_COMPONENT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkColorComponentFlagBits; +typedef VkFlags VkColorComponentFlags; +typedef VkFlags VkPipelineDynamicStateCreateFlags; +typedef VkFlags VkPipelineLayoutCreateFlags; +typedef VkFlags VkShaderStageFlags; +typedef VkFlags VkSamplerCreateFlags; + +typedef enum VkDescriptorSetLayoutCreateFlagBits { + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR = 0x00000001, + VK_DESCRIPTOR_SET_LAYOUT_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorSetLayoutCreateFlagBits; +typedef VkFlags VkDescriptorSetLayoutCreateFlags; + +typedef enum VkDescriptorPoolCreateFlagBits { + VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001, + VK_DESCRIPTOR_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDescriptorPoolCreateFlagBits; +typedef VkFlags VkDescriptorPoolCreateFlags; +typedef VkFlags VkDescriptorPoolResetFlags; +typedef VkFlags VkFramebufferCreateFlags; +typedef VkFlags VkRenderPassCreateFlags; + +typedef enum VkAttachmentDescriptionFlagBits { + VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001, + VK_ATTACHMENT_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkAttachmentDescriptionFlagBits; +typedef VkFlags VkAttachmentDescriptionFlags; + +typedef enum VkSubpassDescriptionFlagBits { + VK_SUBPASS_DESCRIPTION_PER_VIEW_ATTRIBUTES_BIT_NVX = 0x00000001, + VK_SUBPASS_DESCRIPTION_PER_VIEW_POSITION_X_ONLY_BIT_NVX = 0x00000002, + VK_SUBPASS_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkSubpassDescriptionFlagBits; +typedef VkFlags VkSubpassDescriptionFlags; + +typedef enum VkAccessFlagBits { + VK_ACCESS_INDIRECT_COMMAND_READ_BIT = 0x00000001, + VK_ACCESS_INDEX_READ_BIT = 0x00000002, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT = 0x00000004, + VK_ACCESS_UNIFORM_READ_BIT = 0x00000008, + VK_ACCESS_INPUT_ATTACHMENT_READ_BIT = 0x00000010, + VK_ACCESS_SHADER_READ_BIT = 0x00000020, + VK_ACCESS_SHADER_WRITE_BIT = 0x00000040, + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT = 0x00000080, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT = 0x00000100, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT = 0x00000200, + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT = 0x00000400, + VK_ACCESS_TRANSFER_READ_BIT = 0x00000800, + VK_ACCESS_TRANSFER_WRITE_BIT = 0x00001000, + VK_ACCESS_HOST_READ_BIT = 0x00002000, + VK_ACCESS_HOST_WRITE_BIT = 0x00004000, + VK_ACCESS_MEMORY_READ_BIT = 0x00008000, + VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000, + VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX = 0x00020000, + VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x00040000, + VK_ACCESS_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT = 0x00080000, + VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkAccessFlagBits; +typedef VkFlags VkAccessFlags; + +typedef enum VkDependencyFlagBits { + VK_DEPENDENCY_BY_REGION_BIT = 0x00000001, + VK_DEPENDENCY_VIEW_LOCAL_BIT_KHX = 0x00000002, + VK_DEPENDENCY_DEVICE_GROUP_BIT_KHX = 0x00000004, + VK_DEPENDENCY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkDependencyFlagBits; +typedef VkFlags VkDependencyFlags; + +typedef enum VkCommandPoolCreateFlagBits { + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001, + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002, + VK_COMMAND_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkCommandPoolCreateFlagBits; +typedef VkFlags VkCommandPoolCreateFlags; + +typedef enum VkCommandPoolResetFlagBits { + VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001, + VK_COMMAND_POOL_RESET_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkCommandPoolResetFlagBits; +typedef VkFlags VkCommandPoolResetFlags; + +typedef enum VkCommandBufferUsageFlagBits { + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT = 0x00000001, + VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT = 0x00000002, + VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT = 0x00000004, + VK_COMMAND_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkCommandBufferUsageFlagBits; +typedef VkFlags VkCommandBufferUsageFlags; + +typedef enum VkQueryControlFlagBits { + VK_QUERY_CONTROL_PRECISE_BIT = 0x00000001, + VK_QUERY_CONTROL_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkQueryControlFlagBits; +typedef VkFlags VkQueryControlFlags; + +typedef enum VkCommandBufferResetFlagBits { + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001, + VK_COMMAND_BUFFER_RESET_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkCommandBufferResetFlagBits; +typedef VkFlags VkCommandBufferResetFlags; + +typedef enum VkStencilFaceFlagBits { + VK_STENCIL_FACE_FRONT_BIT = 0x00000001, + VK_STENCIL_FACE_BACK_BIT = 0x00000002, + VK_STENCIL_FRONT_AND_BACK = 0x00000003, + VK_STENCIL_FACE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkStencilFaceFlagBits; +typedef VkFlags VkStencilFaceFlags; + +typedef struct VkApplicationInfo { + VkStructureType sType; + const void *pNext; + const char *pApplicationName; + uint32_t applicationVersion; + const char *pEngineName; + uint32_t engineVersion; + uint32_t apiVersion; +} VkApplicationInfo; + +typedef struct VkInstanceCreateInfo { + VkStructureType sType; + const void *pNext; + VkInstanceCreateFlags flags; + const VkApplicationInfo *pApplicationInfo; + uint32_t enabledLayerCount; + const char *const *ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char *const *ppEnabledExtensionNames; +} VkInstanceCreateInfo; + +typedef void *(VKAPI_PTR *PFN_vkAllocationFunction)( + void *pUserData, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void *(VKAPI_PTR *PFN_vkReallocationFunction)( + void *pUserData, + void *pOriginal, + size_t size, + size_t alignment, + VkSystemAllocationScope allocationScope); + +typedef void(VKAPI_PTR *PFN_vkFreeFunction)( + void *pUserData, + void *pMemory); + +typedef void(VKAPI_PTR *PFN_vkInternalAllocationNotification)( + void *pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); + +typedef void(VKAPI_PTR *PFN_vkInternalFreeNotification)( + void *pUserData, + size_t size, + VkInternalAllocationType allocationType, + VkSystemAllocationScope allocationScope); + +typedef struct VkAllocationCallbacks { + void *pUserData; + PFN_vkAllocationFunction pfnAllocation; + PFN_vkReallocationFunction pfnReallocation; + PFN_vkFreeFunction pfnFree; + PFN_vkInternalAllocationNotification pfnInternalAllocation; + PFN_vkInternalFreeNotification pfnInternalFree; +} VkAllocationCallbacks; + +typedef struct VkPhysicalDeviceFeatures { + VkBool32 robustBufferAccess; + VkBool32 fullDrawIndexUint32; + VkBool32 imageCubeArray; + VkBool32 independentBlend; + VkBool32 geometryShader; + VkBool32 tessellationShader; + VkBool32 sampleRateShading; + VkBool32 dualSrcBlend; + VkBool32 logicOp; + VkBool32 multiDrawIndirect; + VkBool32 drawIndirectFirstInstance; + VkBool32 depthClamp; + VkBool32 depthBiasClamp; + VkBool32 fillModeNonSolid; + VkBool32 depthBounds; + VkBool32 wideLines; + VkBool32 largePoints; + VkBool32 alphaToOne; + VkBool32 multiViewport; + VkBool32 samplerAnisotropy; + VkBool32 textureCompressionETC2; + VkBool32 textureCompressionASTC_LDR; + VkBool32 textureCompressionBC; + VkBool32 occlusionQueryPrecise; + VkBool32 pipelineStatisticsQuery; + VkBool32 vertexPipelineStoresAndAtomics; + VkBool32 fragmentStoresAndAtomics; + VkBool32 shaderTessellationAndGeometryPointSize; + VkBool32 shaderImageGatherExtended; + VkBool32 shaderStorageImageExtendedFormats; + VkBool32 shaderStorageImageMultisample; + VkBool32 shaderStorageImageReadWithoutFormat; + VkBool32 shaderStorageImageWriteWithoutFormat; + VkBool32 shaderUniformBufferArrayDynamicIndexing; + VkBool32 shaderSampledImageArrayDynamicIndexing; + VkBool32 shaderStorageBufferArrayDynamicIndexing; + VkBool32 shaderStorageImageArrayDynamicIndexing; + VkBool32 shaderClipDistance; + VkBool32 shaderCullDistance; + VkBool32 shaderFloat64; + VkBool32 shaderInt64; + VkBool32 shaderInt16; + VkBool32 shaderResourceResidency; + VkBool32 shaderResourceMinLod; + VkBool32 sparseBinding; + VkBool32 sparseResidencyBuffer; + VkBool32 sparseResidencyImage2D; + VkBool32 sparseResidencyImage3D; + VkBool32 sparseResidency2Samples; + VkBool32 sparseResidency4Samples; + VkBool32 sparseResidency8Samples; + VkBool32 sparseResidency16Samples; + VkBool32 sparseResidencyAliased; + VkBool32 variableMultisampleRate; + VkBool32 inheritedQueries; +} VkPhysicalDeviceFeatures; + +typedef struct VkFormatProperties { + VkFormatFeatureFlags linearTilingFeatures; + VkFormatFeatureFlags optimalTilingFeatures; + VkFormatFeatureFlags bufferFeatures; +} VkFormatProperties; + +typedef struct VkExtent3D { + uint32_t width; + uint32_t height; + uint32_t depth; +} VkExtent3D; + +typedef struct VkImageFormatProperties { + VkExtent3D maxExtent; + uint32_t maxMipLevels; + uint32_t maxArrayLayers; + VkSampleCountFlags sampleCounts; + VkDeviceSize maxResourceSize; +} VkImageFormatProperties; + +typedef struct VkPhysicalDeviceLimits { + uint32_t maxImageDimension1D; + uint32_t maxImageDimension2D; + uint32_t maxImageDimension3D; + uint32_t maxImageDimensionCube; + uint32_t maxImageArrayLayers; + uint32_t maxTexelBufferElements; + uint32_t maxUniformBufferRange; + uint32_t maxStorageBufferRange; + uint32_t maxPushConstantsSize; + uint32_t maxMemoryAllocationCount; + uint32_t maxSamplerAllocationCount; + VkDeviceSize bufferImageGranularity; + VkDeviceSize sparseAddressSpaceSize; + uint32_t maxBoundDescriptorSets; + uint32_t maxPerStageDescriptorSamplers; + uint32_t maxPerStageDescriptorUniformBuffers; + uint32_t maxPerStageDescriptorStorageBuffers; + uint32_t maxPerStageDescriptorSampledImages; + uint32_t maxPerStageDescriptorStorageImages; + uint32_t maxPerStageDescriptorInputAttachments; + uint32_t maxPerStageResources; + uint32_t maxDescriptorSetSamplers; + uint32_t maxDescriptorSetUniformBuffers; + uint32_t maxDescriptorSetUniformBuffersDynamic; + uint32_t maxDescriptorSetStorageBuffers; + uint32_t maxDescriptorSetStorageBuffersDynamic; + uint32_t maxDescriptorSetSampledImages; + uint32_t maxDescriptorSetStorageImages; + uint32_t maxDescriptorSetInputAttachments; + uint32_t maxVertexInputAttributes; + uint32_t maxVertexInputBindings; + uint32_t maxVertexInputAttributeOffset; + uint32_t maxVertexInputBindingStride; + uint32_t maxVertexOutputComponents; + uint32_t maxTessellationGenerationLevel; + uint32_t maxTessellationPatchSize; + uint32_t maxTessellationControlPerVertexInputComponents; + uint32_t maxTessellationControlPerVertexOutputComponents; + uint32_t maxTessellationControlPerPatchOutputComponents; + uint32_t maxTessellationControlTotalOutputComponents; + uint32_t maxTessellationEvaluationInputComponents; + uint32_t maxTessellationEvaluationOutputComponents; + uint32_t maxGeometryShaderInvocations; + uint32_t maxGeometryInputComponents; + uint32_t maxGeometryOutputComponents; + uint32_t maxGeometryOutputVertices; + uint32_t maxGeometryTotalOutputComponents; + uint32_t maxFragmentInputComponents; + uint32_t maxFragmentOutputAttachments; + uint32_t maxFragmentDualSrcAttachments; + uint32_t maxFragmentCombinedOutputResources; + uint32_t maxComputeSharedMemorySize; + uint32_t maxComputeWorkGroupCount[3]; + uint32_t maxComputeWorkGroupInvocations; + uint32_t maxComputeWorkGroupSize[3]; + uint32_t subPixelPrecisionBits; + uint32_t subTexelPrecisionBits; + uint32_t mipmapPrecisionBits; + uint32_t maxDrawIndexedIndexValue; + uint32_t maxDrawIndirectCount; + float maxSamplerLodBias; + float maxSamplerAnisotropy; + uint32_t maxViewports; + uint32_t maxViewportDimensions[2]; + float viewportBoundsRange[2]; + uint32_t viewportSubPixelBits; + size_t minMemoryMapAlignment; + VkDeviceSize minTexelBufferOffsetAlignment; + VkDeviceSize minUniformBufferOffsetAlignment; + VkDeviceSize minStorageBufferOffsetAlignment; + int32_t minTexelOffset; + uint32_t maxTexelOffset; + int32_t minTexelGatherOffset; + uint32_t maxTexelGatherOffset; + float minInterpolationOffset; + float maxInterpolationOffset; + uint32_t subPixelInterpolationOffsetBits; + uint32_t maxFramebufferWidth; + uint32_t maxFramebufferHeight; + uint32_t maxFramebufferLayers; + VkSampleCountFlags framebufferColorSampleCounts; + VkSampleCountFlags framebufferDepthSampleCounts; + VkSampleCountFlags framebufferStencilSampleCounts; + VkSampleCountFlags framebufferNoAttachmentsSampleCounts; + uint32_t maxColorAttachments; + VkSampleCountFlags sampledImageColorSampleCounts; + VkSampleCountFlags sampledImageIntegerSampleCounts; + VkSampleCountFlags sampledImageDepthSampleCounts; + VkSampleCountFlags sampledImageStencilSampleCounts; + VkSampleCountFlags storageImageSampleCounts; + uint32_t maxSampleMaskWords; + VkBool32 timestampComputeAndGraphics; + float timestampPeriod; + uint32_t maxClipDistances; + uint32_t maxCullDistances; + uint32_t maxCombinedClipAndCullDistances; + uint32_t discreteQueuePriorities; + float pointSizeRange[2]; + float lineWidthRange[2]; + float pointSizeGranularity; + float lineWidthGranularity; + VkBool32 strictLines; + VkBool32 standardSampleLocations; + VkDeviceSize optimalBufferCopyOffsetAlignment; + VkDeviceSize optimalBufferCopyRowPitchAlignment; + VkDeviceSize nonCoherentAtomSize; +} VkPhysicalDeviceLimits; + +typedef struct VkPhysicalDeviceSparseProperties { + VkBool32 residencyStandard2DBlockShape; + VkBool32 residencyStandard2DMultisampleBlockShape; + VkBool32 residencyStandard3DBlockShape; + VkBool32 residencyAlignedMipSize; + VkBool32 residencyNonResidentStrict; +} VkPhysicalDeviceSparseProperties; + +typedef struct VkPhysicalDeviceProperties { + uint32_t apiVersion; + uint32_t driverVersion; + uint32_t vendorID; + uint32_t deviceID; + VkPhysicalDeviceType deviceType; + char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; + uint8_t pipelineCacheUUID[VK_UUID_SIZE]; + VkPhysicalDeviceLimits limits; + VkPhysicalDeviceSparseProperties sparseProperties; +} VkPhysicalDeviceProperties; + +typedef struct VkQueueFamilyProperties { + VkQueueFlags queueFlags; + uint32_t queueCount; + uint32_t timestampValidBits; + VkExtent3D minImageTransferGranularity; +} VkQueueFamilyProperties; + +typedef struct VkMemoryType { + VkMemoryPropertyFlags propertyFlags; + uint32_t heapIndex; +} VkMemoryType; + +typedef struct VkMemoryHeap { + VkDeviceSize size; + VkMemoryHeapFlags flags; +} VkMemoryHeap; + +typedef struct VkPhysicalDeviceMemoryProperties { + uint32_t memoryTypeCount; + VkMemoryType memoryTypes[VK_MAX_MEMORY_TYPES]; + uint32_t memoryHeapCount; + VkMemoryHeap memoryHeaps[VK_MAX_MEMORY_HEAPS]; +} VkPhysicalDeviceMemoryProperties; + +typedef void(VKAPI_PTR *PFN_vkVoidFunction)(); +typedef struct VkDeviceQueueCreateInfo { + VkStructureType sType; + const void *pNext; + VkDeviceQueueCreateFlags flags; + uint32_t queueFamilyIndex; + uint32_t queueCount; + const float *pQueuePriorities; +} VkDeviceQueueCreateInfo; + +typedef struct VkDeviceCreateInfo { + VkStructureType sType; + const void *pNext; + VkDeviceCreateFlags flags; + uint32_t queueCreateInfoCount; + const VkDeviceQueueCreateInfo *pQueueCreateInfos; + uint32_t enabledLayerCount; + const char *const *ppEnabledLayerNames; + uint32_t enabledExtensionCount; + const char *const *ppEnabledExtensionNames; + const VkPhysicalDeviceFeatures *pEnabledFeatures; +} VkDeviceCreateInfo; + +typedef struct VkExtensionProperties { + char extensionName[VK_MAX_EXTENSION_NAME_SIZE]; + uint32_t specVersion; +} VkExtensionProperties; + +typedef struct VkLayerProperties { + char layerName[VK_MAX_EXTENSION_NAME_SIZE]; + uint32_t specVersion; + uint32_t implementationVersion; + char description[VK_MAX_DESCRIPTION_SIZE]; +} VkLayerProperties; + +typedef struct VkSubmitInfo { + VkStructureType sType; + const void *pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore *pWaitSemaphores; + const VkPipelineStageFlags *pWaitDstStageMask; + uint32_t commandBufferCount; + const VkCommandBuffer *pCommandBuffers; + uint32_t signalSemaphoreCount; + const VkSemaphore *pSignalSemaphores; +} VkSubmitInfo; + +typedef struct VkMemoryAllocateInfo { + VkStructureType sType; + const void *pNext; + VkDeviceSize allocationSize; + uint32_t memoryTypeIndex; +} VkMemoryAllocateInfo; + +typedef struct VkMappedMemoryRange { + VkStructureType sType; + const void *pNext; + VkDeviceMemory memory; + VkDeviceSize offset; + VkDeviceSize size; +} VkMappedMemoryRange; + +typedef struct VkMemoryRequirements { + VkDeviceSize size; + VkDeviceSize alignment; + uint32_t memoryTypeBits; +} VkMemoryRequirements; + +typedef struct VkSparseImageFormatProperties { + VkImageAspectFlags aspectMask; + VkExtent3D imageGranularity; + VkSparseImageFormatFlags flags; +} VkSparseImageFormatProperties; + +typedef struct VkSparseImageMemoryRequirements { + VkSparseImageFormatProperties formatProperties; + uint32_t imageMipTailFirstLod; + VkDeviceSize imageMipTailSize; + VkDeviceSize imageMipTailOffset; + VkDeviceSize imageMipTailStride; +} VkSparseImageMemoryRequirements; + +typedef struct VkSparseMemoryBind { + VkDeviceSize resourceOffset; + VkDeviceSize size; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + VkSparseMemoryBindFlags flags; +} VkSparseMemoryBind; + +typedef struct VkSparseBufferMemoryBindInfo { + VkBuffer buffer; + uint32_t bindCount; + const VkSparseMemoryBind *pBinds; +} VkSparseBufferMemoryBindInfo; + +typedef struct VkSparseImageOpaqueMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseMemoryBind *pBinds; +} VkSparseImageOpaqueMemoryBindInfo; + +typedef struct VkImageSubresource { + VkImageAspectFlags aspectMask; + uint32_t mipLevel; + uint32_t arrayLayer; +} VkImageSubresource; + +typedef struct VkOffset3D { + int32_t x; + int32_t y; + int32_t z; +} VkOffset3D; + +typedef struct VkSparseImageMemoryBind { + VkImageSubresource subresource; + VkOffset3D offset; + VkExtent3D extent; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + VkSparseMemoryBindFlags flags; +} VkSparseImageMemoryBind; + +typedef struct VkSparseImageMemoryBindInfo { + VkImage image; + uint32_t bindCount; + const VkSparseImageMemoryBind *pBinds; +} VkSparseImageMemoryBindInfo; + +typedef struct VkBindSparseInfo { + VkStructureType sType; + const void *pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore *pWaitSemaphores; + uint32_t bufferBindCount; + const VkSparseBufferMemoryBindInfo *pBufferBinds; + uint32_t imageOpaqueBindCount; + const VkSparseImageOpaqueMemoryBindInfo *pImageOpaqueBinds; + uint32_t imageBindCount; + const VkSparseImageMemoryBindInfo *pImageBinds; + uint32_t signalSemaphoreCount; + const VkSemaphore *pSignalSemaphores; +} VkBindSparseInfo; + +typedef struct VkFenceCreateInfo { + VkStructureType sType; + const void *pNext; + VkFenceCreateFlags flags; +} VkFenceCreateInfo; + +typedef struct VkSemaphoreCreateInfo { + VkStructureType sType; + const void *pNext; + VkSemaphoreCreateFlags flags; +} VkSemaphoreCreateInfo; + +typedef struct VkEventCreateInfo { + VkStructureType sType; + const void *pNext; + VkEventCreateFlags flags; +} VkEventCreateInfo; + +typedef struct VkQueryPoolCreateInfo { + VkStructureType sType; + const void *pNext; + VkQueryPoolCreateFlags flags; + VkQueryType queryType; + uint32_t queryCount; + VkQueryPipelineStatisticFlags pipelineStatistics; +} VkQueryPoolCreateInfo; + +typedef struct VkBufferCreateInfo { + VkStructureType sType; + const void *pNext; + VkBufferCreateFlags flags; + VkDeviceSize size; + VkBufferUsageFlags usage; + VkSharingMode sharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t *pQueueFamilyIndices; +} VkBufferCreateInfo; + +typedef struct VkBufferViewCreateInfo { + VkStructureType sType; + const void *pNext; + VkBufferViewCreateFlags flags; + VkBuffer buffer; + VkFormat format; + VkDeviceSize offset; + VkDeviceSize range; +} VkBufferViewCreateInfo; + +typedef struct VkImageCreateInfo { + VkStructureType sType; + const void *pNext; + VkImageCreateFlags flags; + VkImageType imageType; + VkFormat format; + VkExtent3D extent; + uint32_t mipLevels; + uint32_t arrayLayers; + VkSampleCountFlagBits samples; + VkImageTiling tiling; + VkImageUsageFlags usage; + VkSharingMode sharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t *pQueueFamilyIndices; + VkImageLayout initialLayout; +} VkImageCreateInfo; + +typedef struct VkSubresourceLayout { + VkDeviceSize offset; + VkDeviceSize size; + VkDeviceSize rowPitch; + VkDeviceSize arrayPitch; + VkDeviceSize depthPitch; +} VkSubresourceLayout; + +typedef struct VkComponentMapping { + VkComponentSwizzle r; + VkComponentSwizzle g; + VkComponentSwizzle b; + VkComponentSwizzle a; +} VkComponentMapping; + +typedef struct VkImageSubresourceRange { + VkImageAspectFlags aspectMask; + uint32_t baseMipLevel; + uint32_t levelCount; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceRange; + +typedef struct VkImageViewCreateInfo { + VkStructureType sType; + const void *pNext; + VkImageViewCreateFlags flags; + VkImage image; + VkImageViewType viewType; + VkFormat format; + VkComponentMapping components; + VkImageSubresourceRange subresourceRange; +} VkImageViewCreateInfo; + +typedef struct VkShaderModuleCreateInfo { + VkStructureType sType; + const void *pNext; + VkShaderModuleCreateFlags flags; + size_t codeSize; + const uint32_t *pCode; +} VkShaderModuleCreateInfo; + +typedef struct VkPipelineCacheCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineCacheCreateFlags flags; + size_t initialDataSize; + const void *pInitialData; +} VkPipelineCacheCreateInfo; + +typedef struct VkSpecializationMapEntry { + uint32_t constantID; + uint32_t offset; + size_t size; +} VkSpecializationMapEntry; + +typedef struct VkSpecializationInfo { + uint32_t mapEntryCount; + const VkSpecializationMapEntry *pMapEntries; + size_t dataSize; + const void *pData; +} VkSpecializationInfo; + +typedef struct VkPipelineShaderStageCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineShaderStageCreateFlags flags; + VkShaderStageFlagBits stage; + VkShaderModule module; + const char *pName; + const VkSpecializationInfo *pSpecializationInfo; +} VkPipelineShaderStageCreateInfo; + +typedef struct VkVertexInputBindingDescription { + uint32_t binding; + uint32_t stride; + VkVertexInputRate inputRate; +} VkVertexInputBindingDescription; + +typedef struct VkVertexInputAttributeDescription { + uint32_t location; + uint32_t binding; + VkFormat format; + uint32_t offset; +} VkVertexInputAttributeDescription; + +typedef struct VkPipelineVertexInputStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineVertexInputStateCreateFlags flags; + uint32_t vertexBindingDescriptionCount; + const VkVertexInputBindingDescription *pVertexBindingDescriptions; + uint32_t vertexAttributeDescriptionCount; + const VkVertexInputAttributeDescription *pVertexAttributeDescriptions; +} VkPipelineVertexInputStateCreateInfo; + +typedef struct VkPipelineInputAssemblyStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineInputAssemblyStateCreateFlags flags; + VkPrimitiveTopology topology; + VkBool32 primitiveRestartEnable; +} VkPipelineInputAssemblyStateCreateInfo; + +typedef struct VkPipelineTessellationStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineTessellationStateCreateFlags flags; + uint32_t patchControlPoints; +} VkPipelineTessellationStateCreateInfo; + +typedef struct VkViewport { + float x; + float y; + float width; + float height; + float minDepth; + float maxDepth; +} VkViewport; + +typedef struct VkOffset2D { + int32_t x; + int32_t y; +} VkOffset2D; + +typedef struct VkExtent2D { + uint32_t width; + uint32_t height; +} VkExtent2D; + +typedef struct VkRect2D { + VkOffset2D offset; + VkExtent2D extent; +} VkRect2D; + +typedef struct VkPipelineViewportStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineViewportStateCreateFlags flags; + uint32_t viewportCount; + const VkViewport *pViewports; + uint32_t scissorCount; + const VkRect2D *pScissors; +} VkPipelineViewportStateCreateInfo; + +typedef struct VkPipelineRasterizationStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineRasterizationStateCreateFlags flags; + VkBool32 depthClampEnable; + VkBool32 rasterizerDiscardEnable; + VkPolygonMode polygonMode; + VkCullModeFlags cullMode; + VkFrontFace frontFace; + VkBool32 depthBiasEnable; + float depthBiasConstantFactor; + float depthBiasClamp; + float depthBiasSlopeFactor; + float lineWidth; +} VkPipelineRasterizationStateCreateInfo; + +typedef struct VkPipelineMultisampleStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineMultisampleStateCreateFlags flags; + VkSampleCountFlagBits rasterizationSamples; + VkBool32 sampleShadingEnable; + float minSampleShading; + const VkSampleMask *pSampleMask; + VkBool32 alphaToCoverageEnable; + VkBool32 alphaToOneEnable; +} VkPipelineMultisampleStateCreateInfo; + +typedef struct VkStencilOpState { + VkStencilOp failOp; + VkStencilOp passOp; + VkStencilOp depthFailOp; + VkCompareOp compareOp; + uint32_t compareMask; + uint32_t writeMask; + uint32_t reference; +} VkStencilOpState; + +typedef struct VkPipelineDepthStencilStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineDepthStencilStateCreateFlags flags; + VkBool32 depthTestEnable; + VkBool32 depthWriteEnable; + VkCompareOp depthCompareOp; + VkBool32 depthBoundsTestEnable; + VkBool32 stencilTestEnable; + VkStencilOpState front; + VkStencilOpState back; + float minDepthBounds; + float maxDepthBounds; +} VkPipelineDepthStencilStateCreateInfo; + +typedef struct VkPipelineColorBlendAttachmentState { + VkBool32 blendEnable; + VkBlendFactor srcColorBlendFactor; + VkBlendFactor dstColorBlendFactor; + VkBlendOp colorBlendOp; + VkBlendFactor srcAlphaBlendFactor; + VkBlendFactor dstAlphaBlendFactor; + VkBlendOp alphaBlendOp; + VkColorComponentFlags colorWriteMask; +} VkPipelineColorBlendAttachmentState; + +typedef struct VkPipelineColorBlendStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineColorBlendStateCreateFlags flags; + VkBool32 logicOpEnable; + VkLogicOp logicOp; + uint32_t attachmentCount; + const VkPipelineColorBlendAttachmentState *pAttachments; + float blendConstants[4]; +} VkPipelineColorBlendStateCreateInfo; + +typedef struct VkPipelineDynamicStateCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineDynamicStateCreateFlags flags; + uint32_t dynamicStateCount; + const VkDynamicState *pDynamicStates; +} VkPipelineDynamicStateCreateInfo; + +typedef struct VkGraphicsPipelineCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineCreateFlags flags; + uint32_t stageCount; + const VkPipelineShaderStageCreateInfo *pStages; + const VkPipelineVertexInputStateCreateInfo *pVertexInputState; + const VkPipelineInputAssemblyStateCreateInfo *pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo *pTessellationState; + const VkPipelineViewportStateCreateInfo *pViewportState; + const VkPipelineRasterizationStateCreateInfo *pRasterizationState; + const VkPipelineMultisampleStateCreateInfo *pMultisampleState; + const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState; + const VkPipelineColorBlendStateCreateInfo *pColorBlendState; + const VkPipelineDynamicStateCreateInfo *pDynamicState; + VkPipelineLayout layout; + VkRenderPass renderPass; + uint32_t subpass; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkGraphicsPipelineCreateInfo; + +typedef struct VkComputePipelineCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineCreateFlags flags; + VkPipelineShaderStageCreateInfo stage; + VkPipelineLayout layout; + VkPipeline basePipelineHandle; + int32_t basePipelineIndex; +} VkComputePipelineCreateInfo; + +typedef struct VkPushConstantRange { + VkShaderStageFlags stageFlags; + uint32_t offset; + uint32_t size; +} VkPushConstantRange; + +typedef struct VkPipelineLayoutCreateInfo { + VkStructureType sType; + const void *pNext; + VkPipelineLayoutCreateFlags flags; + uint32_t setLayoutCount; + const VkDescriptorSetLayout *pSetLayouts; + uint32_t pushConstantRangeCount; + const VkPushConstantRange *pPushConstantRanges; +} VkPipelineLayoutCreateInfo; + +typedef struct VkSamplerCreateInfo { + VkStructureType sType; + const void *pNext; + VkSamplerCreateFlags flags; + VkFilter magFilter; + VkFilter minFilter; + VkSamplerMipmapMode mipmapMode; + VkSamplerAddressMode addressModeU; + VkSamplerAddressMode addressModeV; + VkSamplerAddressMode addressModeW; + float mipLodBias; + VkBool32 anisotropyEnable; + float maxAnisotropy; + VkBool32 compareEnable; + VkCompareOp compareOp; + float minLod; + float maxLod; + VkBorderColor borderColor; + VkBool32 unnormalizedCoordinates; +} VkSamplerCreateInfo; + +typedef struct VkDescriptorSetLayoutBinding { + uint32_t binding; + VkDescriptorType descriptorType; + uint32_t descriptorCount; + VkShaderStageFlags stageFlags; + const VkSampler *pImmutableSamplers; +} VkDescriptorSetLayoutBinding; + +typedef struct VkDescriptorSetLayoutCreateInfo { + VkStructureType sType; + const void *pNext; + VkDescriptorSetLayoutCreateFlags flags; + uint32_t bindingCount; + const VkDescriptorSetLayoutBinding *pBindings; +} VkDescriptorSetLayoutCreateInfo; + +typedef struct VkDescriptorPoolSize { + VkDescriptorType type; + uint32_t descriptorCount; +} VkDescriptorPoolSize; + +typedef struct VkDescriptorPoolCreateInfo { + VkStructureType sType; + const void *pNext; + VkDescriptorPoolCreateFlags flags; + uint32_t maxSets; + uint32_t poolSizeCount; + const VkDescriptorPoolSize *pPoolSizes; +} VkDescriptorPoolCreateInfo; + +typedef struct VkDescriptorSetAllocateInfo { + VkStructureType sType; + const void *pNext; + VkDescriptorPool descriptorPool; + uint32_t descriptorSetCount; + const VkDescriptorSetLayout *pSetLayouts; +} VkDescriptorSetAllocateInfo; + +typedef struct VkDescriptorImageInfo { + VkSampler sampler; + VkImageView imageView; + VkImageLayout imageLayout; +} VkDescriptorImageInfo; + +typedef struct VkDescriptorBufferInfo { + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize range; +} VkDescriptorBufferInfo; + +typedef struct VkWriteDescriptorSet { + VkStructureType sType; + const void *pNext; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; + VkDescriptorType descriptorType; + const VkDescriptorImageInfo *pImageInfo; + const VkDescriptorBufferInfo *pBufferInfo; + const VkBufferView *pTexelBufferView; +} VkWriteDescriptorSet; + +typedef struct VkCopyDescriptorSet { + VkStructureType sType; + const void *pNext; + VkDescriptorSet srcSet; + uint32_t srcBinding; + uint32_t srcArrayElement; + VkDescriptorSet dstSet; + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; +} VkCopyDescriptorSet; + +typedef struct VkFramebufferCreateInfo { + VkStructureType sType; + const void *pNext; + VkFramebufferCreateFlags flags; + VkRenderPass renderPass; + uint32_t attachmentCount; + const VkImageView *pAttachments; + uint32_t width; + uint32_t height; + uint32_t layers; +} VkFramebufferCreateInfo; + +typedef struct VkAttachmentDescription { + VkAttachmentDescriptionFlags flags; + VkFormat format; + VkSampleCountFlagBits samples; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkAttachmentLoadOp stencilLoadOp; + VkAttachmentStoreOp stencilStoreOp; + VkImageLayout initialLayout; + VkImageLayout finalLayout; +} VkAttachmentDescription; + +typedef struct VkAttachmentReference { + uint32_t attachment; + VkImageLayout layout; +} VkAttachmentReference; + +typedef struct VkSubpassDescription { + VkSubpassDescriptionFlags flags; + VkPipelineBindPoint pipelineBindPoint; + uint32_t inputAttachmentCount; + const VkAttachmentReference *pInputAttachments; + uint32_t colorAttachmentCount; + const VkAttachmentReference *pColorAttachments; + const VkAttachmentReference *pResolveAttachments; + const VkAttachmentReference *pDepthStencilAttachment; + uint32_t preserveAttachmentCount; + const uint32_t *pPreserveAttachments; +} VkSubpassDescription; + +typedef struct VkSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags srcStageMask; + VkPipelineStageFlags dstStageMask; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkDependencyFlags dependencyFlags; +} VkSubpassDependency; + +typedef struct VkRenderPassCreateInfo { + VkStructureType sType; + const void *pNext; + VkRenderPassCreateFlags flags; + uint32_t attachmentCount; + const VkAttachmentDescription *pAttachments; + uint32_t subpassCount; + const VkSubpassDescription *pSubpasses; + uint32_t dependencyCount; + const VkSubpassDependency *pDependencies; +} VkRenderPassCreateInfo; + +typedef struct VkCommandPoolCreateInfo { + VkStructureType sType; + const void *pNext; + VkCommandPoolCreateFlags flags; + uint32_t queueFamilyIndex; +} VkCommandPoolCreateInfo; + +typedef struct VkCommandBufferAllocateInfo { + VkStructureType sType; + const void *pNext; + VkCommandPool commandPool; + VkCommandBufferLevel level; + uint32_t commandBufferCount; +} VkCommandBufferAllocateInfo; + +typedef struct VkCommandBufferInheritanceInfo { + VkStructureType sType; + const void *pNext; + VkRenderPass renderPass; + uint32_t subpass; + VkFramebuffer framebuffer; + VkBool32 occlusionQueryEnable; + VkQueryControlFlags queryFlags; + VkQueryPipelineStatisticFlags pipelineStatistics; +} VkCommandBufferInheritanceInfo; + +typedef struct VkCommandBufferBeginInfo { + VkStructureType sType; + const void *pNext; + VkCommandBufferUsageFlags flags; + const VkCommandBufferInheritanceInfo *pInheritanceInfo; +} VkCommandBufferBeginInfo; + +typedef struct VkBufferCopy { + VkDeviceSize srcOffset; + VkDeviceSize dstOffset; + VkDeviceSize size; +} VkBufferCopy; + +typedef struct VkImageSubresourceLayers { + VkImageAspectFlags aspectMask; + uint32_t mipLevel; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkImageSubresourceLayers; + +typedef struct VkImageCopy { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D extent; +} VkImageCopy; + +typedef struct VkImageBlit { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffsets[2]; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffsets[2]; +} VkImageBlit; + +typedef struct VkBufferImageCopy { + VkDeviceSize bufferOffset; + uint32_t bufferRowLength; + uint32_t bufferImageHeight; + VkImageSubresourceLayers imageSubresource; + VkOffset3D imageOffset; + VkExtent3D imageExtent; +} VkBufferImageCopy; + +typedef union VkClearColorValue { + float float32[4]; + int32_t int32[4]; + uint32_t uint32[4]; +} VkClearColorValue; + +typedef struct VkClearDepthStencilValue { + float depth; + uint32_t stencil; +} VkClearDepthStencilValue; + +typedef union VkClearValue { + VkClearColorValue color; + VkClearDepthStencilValue depthStencil; +} VkClearValue; + +typedef struct VkClearAttachment { + VkImageAspectFlags aspectMask; + uint32_t colorAttachment; + VkClearValue clearValue; +} VkClearAttachment; + +typedef struct VkClearRect { + VkRect2D rect; + uint32_t baseArrayLayer; + uint32_t layerCount; +} VkClearRect; + +typedef struct VkImageResolve { + VkImageSubresourceLayers srcSubresource; + VkOffset3D srcOffset; + VkImageSubresourceLayers dstSubresource; + VkOffset3D dstOffset; + VkExtent3D extent; +} VkImageResolve; + +typedef struct VkMemoryBarrier { + VkStructureType sType; + const void *pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; +} VkMemoryBarrier; + +typedef struct VkBufferMemoryBarrier { + VkStructureType sType; + const void *pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkBuffer buffer; + VkDeviceSize offset; + VkDeviceSize size; +} VkBufferMemoryBarrier; + +typedef struct VkImageMemoryBarrier { + VkStructureType sType; + const void *pNext; + VkAccessFlags srcAccessMask; + VkAccessFlags dstAccessMask; + VkImageLayout oldLayout; + VkImageLayout newLayout; + uint32_t srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex; + VkImage image; + VkImageSubresourceRange subresourceRange; +} VkImageMemoryBarrier; + +typedef struct VkRenderPassBeginInfo { + VkStructureType sType; + const void *pNext; + VkRenderPass renderPass; + VkFramebuffer framebuffer; + VkRect2D renderArea; + uint32_t clearValueCount; + const VkClearValue *pClearValues; +} VkRenderPassBeginInfo; + +typedef struct VkDispatchIndirectCommand { + uint32_t x; + uint32_t y; + uint32_t z; +} VkDispatchIndirectCommand; + +typedef struct VkDrawIndexedIndirectCommand { + uint32_t indexCount; + uint32_t instanceCount; + uint32_t firstIndex; + int32_t vertexOffset; + uint32_t firstInstance; +} VkDrawIndexedIndirectCommand; + +typedef struct VkDrawIndirectCommand { + uint32_t vertexCount; + uint32_t instanceCount; + uint32_t firstVertex; + uint32_t firstInstance; +} VkDrawIndirectCommand; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateInstance)(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *pInstance); +typedef void(VKAPI_PTR *PFN_vkDestroyInstance)(VkInstance instance, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkEnumeratePhysicalDevices)(VkInstance instance, uint32_t *pPhysicalDeviceCount, VkPhysicalDevice *pPhysicalDevices); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties *pFormatProperties); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkImageFormatProperties *pImageFormatProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties *pProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties)(VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties *pQueueFamilyProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties *pMemoryProperties); +typedef PFN_vkVoidFunction(VKAPI_PTR *PFN_vkGetInstanceProcAddr)(VkInstance instance, const char *pName); +typedef PFN_vkVoidFunction(VKAPI_PTR *PFN_vkGetDeviceProcAddr)(VkDevice device, const char *pName); +typedef VkResult(VKAPI_PTR *PFN_vkCreateDevice)(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); +typedef void(VKAPI_PTR *PFN_vkDestroyDevice)(VkDevice device, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkEnumerateInstanceExtensionProperties)(const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkEnumerateDeviceExtensionProperties)(VkPhysicalDevice physicalDevice, const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkEnumerateInstanceLayerProperties)(uint32_t *pPropertyCount, VkLayerProperties *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkEnumerateDeviceLayerProperties)(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkLayerProperties *pProperties); +typedef void(VKAPI_PTR *PFN_vkGetDeviceQueue)(VkDevice device, uint32_t queueFamilyIndex, uint32_t queueIndex, VkQueue *pQueue); +typedef VkResult(VKAPI_PTR *PFN_vkQueueSubmit)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence); +typedef VkResult(VKAPI_PTR *PFN_vkQueueWaitIdle)(VkQueue queue); +typedef VkResult(VKAPI_PTR *PFN_vkDeviceWaitIdle)(VkDevice device); +typedef VkResult(VKAPI_PTR *PFN_vkAllocateMemory)(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMemory); +typedef void(VKAPI_PTR *PFN_vkFreeMemory)(VkDevice device, VkDeviceMemory memory, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkMapMemory)(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void **ppData); +typedef void(VKAPI_PTR *PFN_vkUnmapMemory)(VkDevice device, VkDeviceMemory memory); +typedef VkResult(VKAPI_PTR *PFN_vkFlushMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges); +typedef VkResult(VKAPI_PTR *PFN_vkInvalidateMappedMemoryRanges)(VkDevice device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges); +typedef void(VKAPI_PTR *PFN_vkGetDeviceMemoryCommitment)(VkDevice device, VkDeviceMemory memory, VkDeviceSize *pCommittedMemoryInBytes); +typedef VkResult(VKAPI_PTR *PFN_vkBindBufferMemory)(VkDevice device, VkBuffer buffer, VkDeviceMemory memory, VkDeviceSize memoryOffset); +typedef VkResult(VKAPI_PTR *PFN_vkBindImageMemory)(VkDevice device, VkImage image, VkDeviceMemory memory, VkDeviceSize memoryOffset); +typedef void(VKAPI_PTR *PFN_vkGetBufferMemoryRequirements)(VkDevice device, VkBuffer buffer, VkMemoryRequirements *pMemoryRequirements); +typedef void(VKAPI_PTR *PFN_vkGetImageMemoryRequirements)(VkDevice device, VkImage image, VkMemoryRequirements *pMemoryRequirements); +typedef void(VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements)(VkDevice device, VkImage image, uint32_t *pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements *pSparseMemoryRequirements); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t *pPropertyCount, VkSparseImageFormatProperties *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkQueueBindSparse)(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo, VkFence fence); +typedef VkResult(VKAPI_PTR *PFN_vkCreateFence)(VkDevice device, const VkFenceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkFence *pFence); +typedef void(VKAPI_PTR *PFN_vkDestroyFence)(VkDevice device, VkFence fence, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkResetFences)(VkDevice device, uint32_t fenceCount, const VkFence *pFences); +typedef VkResult(VKAPI_PTR *PFN_vkGetFenceStatus)(VkDevice device, VkFence fence); +typedef VkResult(VKAPI_PTR *PFN_vkWaitForFences)(VkDevice device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll, uint64_t timeout); +typedef VkResult(VKAPI_PTR *PFN_vkCreateSemaphore)(VkDevice device, const VkSemaphoreCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore); +typedef void(VKAPI_PTR *PFN_vkDestroySemaphore)(VkDevice device, VkSemaphore semaphore, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateEvent)(VkDevice device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkEvent *pEvent); +typedef void(VKAPI_PTR *PFN_vkDestroyEvent)(VkDevice device, VkEvent event, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkGetEventStatus)(VkDevice device, VkEvent event); +typedef VkResult(VKAPI_PTR *PFN_vkSetEvent)(VkDevice device, VkEvent event); +typedef VkResult(VKAPI_PTR *PFN_vkResetEvent)(VkDevice device, VkEvent event); +typedef VkResult(VKAPI_PTR *PFN_vkCreateQueryPool)(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool); +typedef void(VKAPI_PTR *PFN_vkDestroyQueryPool)(VkDevice device, VkQueryPool queryPool, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkGetQueryPoolResults)(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride, VkQueryResultFlags flags); +typedef VkResult(VKAPI_PTR *PFN_vkCreateBuffer)(VkDevice device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer); +typedef void(VKAPI_PTR *PFN_vkDestroyBuffer)(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateBufferView)(VkDevice device, const VkBufferViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBufferView *pView); +typedef void(VKAPI_PTR *PFN_vkDestroyBufferView)(VkDevice device, VkBufferView bufferView, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateImage)(VkDevice device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImage *pImage); +typedef void(VKAPI_PTR *PFN_vkDestroyImage)(VkDevice device, VkImage image, const VkAllocationCallbacks *pAllocator); +typedef void(VKAPI_PTR *PFN_vkGetImageSubresourceLayout)(VkDevice device, VkImage image, const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout); +typedef VkResult(VKAPI_PTR *PFN_vkCreateImageView)(VkDevice device, const VkImageViewCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkImageView *pView); +typedef void(VKAPI_PTR *PFN_vkDestroyImageView)(VkDevice device, VkImageView imageView, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateShaderModule)(VkDevice device, const VkShaderModuleCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkShaderModule *pShaderModule); +typedef void(VKAPI_PTR *PFN_vkDestroyShaderModule)(VkDevice device, VkShaderModule shaderModule, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreatePipelineCache)(VkDevice device, const VkPipelineCacheCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineCache *pPipelineCache); +typedef void(VKAPI_PTR *PFN_vkDestroyPipelineCache)(VkDevice device, VkPipelineCache pipelineCache, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkGetPipelineCacheData)(VkDevice device, VkPipelineCache pipelineCache, size_t *pDataSize, void *pData); +typedef VkResult(VKAPI_PTR *PFN_vkMergePipelineCaches)(VkDevice device, VkPipelineCache dstCache, uint32_t srcCacheCount, const VkPipelineCache *pSrcCaches); +typedef VkResult(VKAPI_PTR *PFN_vkCreateGraphicsPipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines); +typedef VkResult(VKAPI_PTR *PFN_vkCreateComputePipelines)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines); +typedef void(VKAPI_PTR *PFN_vkDestroyPipeline)(VkDevice device, VkPipeline pipeline, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreatePipelineLayout)(VkDevice device, const VkPipelineLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipelineLayout *pPipelineLayout); +typedef void(VKAPI_PTR *PFN_vkDestroyPipelineLayout)(VkDevice device, VkPipelineLayout pipelineLayout, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateSampler)(VkDevice device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSampler *pSampler); +typedef void(VKAPI_PTR *PFN_vkDestroySampler)(VkDevice device, VkSampler sampler, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateDescriptorSetLayout)(VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorSetLayout *pSetLayout); +typedef void(VKAPI_PTR *PFN_vkDestroyDescriptorSetLayout)(VkDevice device, VkDescriptorSetLayout descriptorSetLayout, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateDescriptorPool)(VkDevice device, const VkDescriptorPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorPool *pDescriptorPool); +typedef void(VKAPI_PTR *PFN_vkDestroyDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkResetDescriptorPool)(VkDevice device, VkDescriptorPool descriptorPool, VkDescriptorPoolResetFlags flags); +typedef VkResult(VKAPI_PTR *PFN_vkAllocateDescriptorSets)(VkDevice device, const VkDescriptorSetAllocateInfo *pAllocateInfo, VkDescriptorSet *pDescriptorSets); +typedef VkResult(VKAPI_PTR *PFN_vkFreeDescriptorSets)(VkDevice device, VkDescriptorPool descriptorPool, uint32_t descriptorSetCount, const VkDescriptorSet *pDescriptorSets); +typedef void(VKAPI_PTR *PFN_vkUpdateDescriptorSets)(VkDevice device, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites, uint32_t descriptorCopyCount, const VkCopyDescriptorSet *pDescriptorCopies); +typedef VkResult(VKAPI_PTR *PFN_vkCreateFramebuffer)(VkDevice device, const VkFramebufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer); +typedef void(VKAPI_PTR *PFN_vkDestroyFramebuffer)(VkDevice device, VkFramebuffer framebuffer, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateRenderPass)(VkDevice device, const VkRenderPassCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkRenderPass *pRenderPass); +typedef void(VKAPI_PTR *PFN_vkDestroyRenderPass)(VkDevice device, VkRenderPass renderPass, const VkAllocationCallbacks *pAllocator); +typedef void(VKAPI_PTR *PFN_vkGetRenderAreaGranularity)(VkDevice device, VkRenderPass renderPass, VkExtent2D *pGranularity); +typedef VkResult(VKAPI_PTR *PFN_vkCreateCommandPool)(VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkCommandPool *pCommandPool); +typedef void(VKAPI_PTR *PFN_vkDestroyCommandPool)(VkDevice device, VkCommandPool commandPool, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags); +typedef VkResult(VKAPI_PTR *PFN_vkAllocateCommandBuffers)(VkDevice device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers); +typedef void(VKAPI_PTR *PFN_vkFreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers); +typedef VkResult(VKAPI_PTR *PFN_vkBeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo); +typedef VkResult(VKAPI_PTR *PFN_vkEndCommandBuffer)(VkCommandBuffer commandBuffer); +typedef VkResult(VKAPI_PTR *PFN_vkResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags); +typedef void(VKAPI_PTR *PFN_vkCmdBindPipeline)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline); +typedef void(VKAPI_PTR *PFN_vkCmdSetViewport)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewport *pViewports); +typedef void(VKAPI_PTR *PFN_vkCmdSetScissor)(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, const VkRect2D *pScissors); +typedef void(VKAPI_PTR *PFN_vkCmdSetLineWidth)(VkCommandBuffer commandBuffer, float lineWidth); +typedef void(VKAPI_PTR *PFN_vkCmdSetDepthBias)(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, float depthBiasClamp, float depthBiasSlopeFactor); +typedef void(VKAPI_PTR *PFN_vkCmdSetBlendConstants)(VkCommandBuffer commandBuffer, const float blendConstants[4]); +typedef void(VKAPI_PTR *PFN_vkCmdSetDepthBounds)(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds); +typedef void(VKAPI_PTR *PFN_vkCmdSetStencilCompareMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t compareMask); +typedef void(VKAPI_PTR *PFN_vkCmdSetStencilWriteMask)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t writeMask); +typedef void(VKAPI_PTR *PFN_vkCmdSetStencilReference)(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, uint32_t reference); +typedef void(VKAPI_PTR *PFN_vkCmdBindDescriptorSets)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount, const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t *pDynamicOffsets); +typedef void(VKAPI_PTR *PFN_vkCmdBindIndexBuffer)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType); +typedef void(VKAPI_PTR *PFN_vkCmdBindVertexBuffers)(VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets); +typedef void(VKAPI_PTR *PFN_vkCmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); +typedef void(VKAPI_PTR *PFN_vkCmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance); +typedef void(VKAPI_PTR *PFN_vkCmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void(VKAPI_PTR *PFN_vkCmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t drawCount, uint32_t stride); +typedef void(VKAPI_PTR *PFN_vkCmdDispatch)(VkCommandBuffer commandBuffer, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); +typedef void(VKAPI_PTR *PFN_vkCmdDispatchIndirect)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset); +typedef void(VKAPI_PTR *PFN_vkCmdCopyBuffer)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferCopy *pRegions); +typedef void(VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageCopy *pRegions); +typedef void(VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit *pRegions, VkFilter filter); +typedef void(VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy *pRegions); +typedef void(VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy *pRegions); +typedef void(VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void *pData); +typedef void(VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data); +typedef void(VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges); +typedef void(VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange *pRanges); +typedef void(VKAPI_PTR *PFN_vkCmdClearAttachments)(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, uint32_t rectCount, const VkClearRect *pRects); +typedef void(VKAPI_PTR *PFN_vkCmdResolveImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageResolve *pRegions); +typedef void(VKAPI_PTR *PFN_vkCmdSetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void(VKAPI_PTR *PFN_vkCmdResetEvent)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags stageMask); +typedef void(VKAPI_PTR *PFN_vkCmdWaitEvents)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers); +typedef void(VKAPI_PTR *PFN_vkCmdPipelineBarrier)(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, VkDependencyFlags dependencyFlags, uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier *pImageMemoryBarriers); +typedef void(VKAPI_PTR *PFN_vkCmdBeginQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags); +typedef void(VKAPI_PTR *PFN_vkCmdEndQuery)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query); +typedef void(VKAPI_PTR *PFN_vkCmdResetQueryPool)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount); +typedef void(VKAPI_PTR *PFN_vkCmdWriteTimestamp)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkQueryPool queryPool, uint32_t query); +typedef void(VKAPI_PTR *PFN_vkCmdCopyQueryPoolResults)(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags); +typedef void(VKAPI_PTR *PFN_vkCmdPushConstants)(VkCommandBuffer commandBuffer, VkPipelineLayout layout, VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, const void *pValues); +typedef void(VKAPI_PTR *PFN_vkCmdBeginRenderPass)(VkCommandBuffer commandBuffer, const VkRenderPassBeginInfo *pRenderPassBegin, VkSubpassContents contents); +typedef void(VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkSubpassContents contents); +typedef void(VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); +typedef void(VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers); + +// This appears to be exported by the loader +VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( + const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance); + +// Same as above ... these two methods are the only prototypes we depend upon +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( + VkInstance instance, + const char *pName); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( + VkInstance instance, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices( + VkInstance instance, + uint32_t *pPhysicalDeviceCount, + VkPhysicalDevice *pPhysicalDevices); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures *pFeatures); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties *pFormatProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags flags, + VkImageFormatProperties *pImageFormatProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties *pProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties( + VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties *pQueueFamilyProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties *pMemoryProperties); + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetDeviceProcAddr( + VkDevice device, + const char *pName); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDevice *pDevice); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDevice( + VkDevice device, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceExtensionProperties( + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, + const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateInstanceLayerProperties( + uint32_t *pPropertyCount, + VkLayerProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkEnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkLayerProperties *pProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceQueue( + VkDevice device, + uint32_t queueFamilyIndex, + uint32_t queueIndex, + VkQueue *pQueue); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo *pSubmits, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueWaitIdle( + VkQueue queue); + +VKAPI_ATTR VkResult VKAPI_CALL vkDeviceWaitIdle( + VkDevice device); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateMemory( + VkDevice device, + const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, + VkDeviceMemory *pMemory); + +VKAPI_ATTR void VKAPI_CALL vkFreeMemory( + VkDevice device, + VkDeviceMemory memory, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkMapMemory( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize offset, + VkDeviceSize size, + VkMemoryMapFlags flags, + void **ppData); + +VKAPI_ATTR void VKAPI_CALL vkUnmapMemory( + VkDevice device, + VkDeviceMemory memory); + +VKAPI_ATTR VkResult VKAPI_CALL vkFlushMappedMemoryRanges( + VkDevice device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges); + +VKAPI_ATTR VkResult VKAPI_CALL vkInvalidateMappedMemoryRanges( + VkDevice device, + uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceMemoryCommitment( + VkDevice device, + VkDeviceMemory memory, + VkDeviceSize *pCommittedMemoryInBytes); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory( + VkDevice device, + VkBuffer buffer, + VkDeviceMemory memory, + VkDeviceSize memoryOffset); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( + VkDevice device, + VkImage image, + VkDeviceMemory memory, + VkDeviceSize memoryOffset); + +VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements( + VkDevice device, + VkBuffer buffer, + VkMemoryRequirements *pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( + VkDevice device, + VkImage image, + VkMemoryRequirements *pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( + VkDevice device, + VkImage image, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements *pSparseMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkSampleCountFlagBits samples, + VkImageUsageFlags usage, + VkImageTiling tiling, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueueBindSparse( + VkQueue queue, + uint32_t bindInfoCount, + const VkBindSparseInfo *pBindInfo, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFence( + VkDevice device, + const VkFenceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence); + +VKAPI_ATTR void VKAPI_CALL vkDestroyFence( + VkDevice device, + VkFence fence, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetFences( + VkDevice device, + uint32_t fenceCount, + const VkFence *pFences); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceStatus( + VkDevice device, + VkFence fence); + +VKAPI_ATTR VkResult VKAPI_CALL vkWaitForFences( + VkDevice device, + uint32_t fenceCount, + const VkFence *pFences, + VkBool32 waitAll, + uint64_t timeout); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSemaphore( + VkDevice device, + const VkSemaphoreCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSemaphore *pSemaphore); + +VKAPI_ATTR void VKAPI_CALL vkDestroySemaphore( + VkDevice device, + VkSemaphore semaphore, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateEvent( + VkDevice device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent); + +VKAPI_ATTR void VKAPI_CALL vkDestroyEvent( + VkDevice device, + VkEvent event, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetEventStatus( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkSetEvent( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetEvent( + VkDevice device, + VkEvent event); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateQueryPool( + VkDevice device, + const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyQueryPool( + VkDevice device, + VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetQueryPoolResults( + VkDevice device, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + size_t dataSize, + void *pData, + VkDeviceSize stride, + VkQueryResultFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBuffer( + VkDevice device, + const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBuffer *pBuffer); + +VKAPI_ATTR void VKAPI_CALL vkDestroyBuffer( + VkDevice device, + VkBuffer buffer, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateBufferView( + VkDevice device, + const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pView); + +VKAPI_ATTR void VKAPI_CALL vkDestroyBufferView( + VkDevice device, + VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( + VkDevice device, + const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImage *pImage); + +VKAPI_ATTR void VKAPI_CALL vkDestroyImage( + VkDevice device, + VkImage image, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkGetImageSubresourceLayout( + VkDevice device, + VkImage image, + const VkImageSubresource *pSubresource, + VkSubresourceLayout *pLayout); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateImageView( + VkDevice device, + const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkImageView *pView); + +VKAPI_ATTR void VKAPI_CALL vkDestroyImageView( + VkDevice device, + VkImageView imageView, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( + VkDevice device, + const VkShaderModuleCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkShaderModule *pShaderModule); + +VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( + VkDevice device, + VkShaderModule shaderModule, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineCache *pPipelineCache); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineCache( + VkDevice device, + VkPipelineCache pipelineCache, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + size_t *pDataSize, + void *pData); + +VKAPI_ATTR VkResult VKAPI_CALL vkMergePipelineCaches( + VkDevice device, + VkPipelineCache dstCache, + uint32_t srcCacheCount, + const VkPipelineCache *pSrcCaches); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkPipeline *pPipelines); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( + VkDevice device, + VkPipeline pipeline, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineLayout( + VkDevice device, + const VkPipelineLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkPipelineLayout *pPipelineLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineLayout( + VkDevice device, + VkPipelineLayout pipelineLayout, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSampler( + VkDevice device, + const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSampler *pSampler); + +VKAPI_ATTR void VKAPI_CALL vkDestroySampler( + VkDevice device, + VkSampler sampler, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorSetLayout( + VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorSetLayout( + VkDevice device, + VkDescriptorSetLayout descriptorSetLayout, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorPool( + VkDevice device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetDescriptorPool( + VkDevice device, + VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateDescriptorSets( + VkDevice device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets); + +VKAPI_ATTR VkResult VKAPI_CALL vkFreeDescriptorSets( + VkDevice device, + VkDescriptorPool descriptorPool, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets); + +VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSets( + VkDevice device, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateFramebuffer( + VkDevice device, + const VkFramebufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkFramebuffer *pFramebuffer); + +VKAPI_ATTR void VKAPI_CALL vkDestroyFramebuffer( + VkDevice device, + VkFramebuffer framebuffer, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateRenderPass( + VkDevice device, + const VkRenderPassCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkRenderPass *pRenderPass); + +VKAPI_ATTR void VKAPI_CALL vkDestroyRenderPass( + VkDevice device, + VkRenderPass renderPass, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkGetRenderAreaGranularity( + VkDevice device, + VkRenderPass renderPass, + VkExtent2D *pGranularity); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateCommandPool( + VkDevice device, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCommandPool); + +VKAPI_ATTR void VKAPI_CALL vkDestroyCommandPool( + VkDevice device, + VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandPool( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolResetFlags flags); + +VKAPI_ATTR VkResult VKAPI_CALL vkAllocateCommandBuffers( + VkDevice device, + const VkCommandBufferAllocateInfo *pAllocateInfo, + VkCommandBuffer *pCommandBuffers); + +VKAPI_ATTR void VKAPI_CALL vkFreeCommandBuffers( + VkDevice device, + VkCommandPool commandPool, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers); + +VKAPI_ATTR VkResult VKAPI_CALL vkBeginCommandBuffer( + VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkEndCommandBuffer( + VkCommandBuffer commandBuffer); + +VKAPI_ATTR VkResult VKAPI_CALL vkResetCommandBuffer( + VkCommandBuffer commandBuffer, + VkCommandBufferResetFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindPipeline( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipeline pipeline); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewport( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewport *pViewports); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetScissor( + VkCommandBuffer commandBuffer, + uint32_t firstScissor, + uint32_t scissorCount, + const VkRect2D *pScissors); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineWidth( + VkCommandBuffer commandBuffer, + float lineWidth); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias( + VkCommandBuffer commandBuffer, + float depthBiasConstantFactor, + float depthBiasClamp, + float depthBiasSlopeFactor); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetBlendConstants( + VkCommandBuffer commandBuffer, + const float blendConstants[4]); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBounds( + VkCommandBuffer commandBuffer, + float minDepthBounds, + float maxDepthBounds); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilCompareMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t compareMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilWriteMask( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t writeMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetStencilReference( + VkCommandBuffer commandBuffer, + VkStencilFaceFlags faceMask, + uint32_t reference); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorSets( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t firstSet, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets, + uint32_t dynamicOffsetCount, + const uint32_t *pDynamicOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindIndexBuffer( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkIndexType indexType); + +VKAPI_ATTR void VKAPI_CALL vkCmdBindVertexBuffers( + VkCommandBuffer commandBuffer, + uint32_t firstBinding, + uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets); + +VKAPI_ATTR void VKAPI_CALL vkCmdDraw( + VkCommandBuffer commandBuffer, + uint32_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t firstInstance); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexed( + VkCommandBuffer commandBuffer, + uint32_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t firstInstance); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + uint32_t drawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDispatch( + VkCommandBuffer commandBuffer, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchIndirect( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBuffer( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferCopy *pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageCopy *pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdBlitImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageBlit *pRegions, + VkFilter filter); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyBufferToImage( + VkCommandBuffer commandBuffer, + VkBuffer srcBuffer, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkBufferImageCopy *pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyImageToBuffer( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkBuffer dstBuffer, + uint32_t regionCount, + const VkBufferImageCopy *pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize dataSize, + const void *pData); + +VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer( + VkCommandBuffer commandBuffer, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize size, + uint32_t data); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearColorImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearDepthStencilImage( + VkCommandBuffer commandBuffer, + VkImage image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges); + +VKAPI_ATTR void VKAPI_CALL vkCmdClearAttachments( + VkCommandBuffer commandBuffer, + uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, + const VkClearRect *pRects); + +VKAPI_ATTR void VKAPI_CALL vkCmdResolveImage( + VkCommandBuffer commandBuffer, + VkImage srcImage, + VkImageLayout srcImageLayout, + VkImage dstImage, + VkImageLayout dstImageLayout, + uint32_t regionCount, + const VkImageResolve *pRegions); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdResetEvent( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags stageMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdWaitEvents( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent *pEvents, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers); + +VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier *pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier *pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier *pImageMemoryBarriers); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginQuery( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query, + VkQueryControlFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndQuery( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t query); + +VKAPI_ATTR void VKAPI_CALL vkCmdResetQueryPool( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteTimestamp( + VkCommandBuffer commandBuffer, + VkPipelineStageFlagBits pipelineStage, + VkQueryPool queryPool, + uint32_t query); + +VKAPI_ATTR void VKAPI_CALL vkCmdCopyQueryPoolResults( + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, + uint32_t firstQuery, + uint32_t queryCount, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + VkDeviceSize stride, + VkQueryResultFlags flags); + +VKAPI_ATTR void VKAPI_CALL vkCmdPushConstants( + VkCommandBuffer commandBuffer, + VkPipelineLayout layout, + VkShaderStageFlags stageFlags, + uint32_t offset, + uint32_t size, + const void *pValues); + +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderPass( + VkCommandBuffer commandBuffer, + const VkRenderPassBeginInfo *pRenderPassBegin, + VkSubpassContents contents); + +VKAPI_ATTR void VKAPI_CALL vkCmdNextSubpass( + VkCommandBuffer commandBuffer, + VkSubpassContents contents); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderPass( + VkCommandBuffer commandBuffer); + +VKAPI_ATTR void VKAPI_CALL vkCmdExecuteCommands( + VkCommandBuffer commandBuffer, + uint32_t commandBufferCount, + const VkCommandBuffer *pCommandBuffers); +#endif + +#define VK_KHR_surface 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR) + +#define VK_KHR_SURFACE_SPEC_VERSION 25 +#define VK_KHR_SURFACE_EXTENSION_NAME "VK_KHR_surface" +#define VK_COLORSPACE_SRGB_NONLINEAR_KHR VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + +typedef enum VkColorSpaceKHR { + VK_COLOR_SPACE_SRGB_NONLINEAR_KHR = 0, + VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT = 1000104001, + VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT = 1000104002, + VK_COLOR_SPACE_DCI_P3_LINEAR_EXT = 1000104003, + VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT = 1000104004, + VK_COLOR_SPACE_BT709_LINEAR_EXT = 1000104005, + VK_COLOR_SPACE_BT709_NONLINEAR_EXT = 1000104006, + VK_COLOR_SPACE_BT2020_LINEAR_EXT = 1000104007, + VK_COLOR_SPACE_HDR10_ST2084_EXT = 1000104008, + VK_COLOR_SPACE_DOLBYVISION_EXT = 1000104009, + VK_COLOR_SPACE_HDR10_HLG_EXT = 1000104010, + VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT = 1000104011, + VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT = 1000104012, + VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013, + VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014, + VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1), // NOLINT: misc-redundant-expression + VK_COLOR_SPACE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkColorSpaceKHR; + +typedef enum VkPresentModeKHR { + VK_PRESENT_MODE_IMMEDIATE_KHR = 0, + VK_PRESENT_MODE_MAILBOX_KHR = 1, + VK_PRESENT_MODE_FIFO_KHR = 2, + VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3, + VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR = 1000111000, + VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR = 1000111001, + VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR, + VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_RELAXED_KHR, + VK_PRESENT_MODE_RANGE_SIZE_KHR = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1), // NOLINT: misc-redundant-expression + VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPresentModeKHR; + +typedef enum VkSurfaceTransformFlagBitsKHR { + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR = 0x00000001, + VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR = 0x00000002, + VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR = 0x00000004, + VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR = 0x00000008, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_BIT_KHR = 0x00000010, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_90_BIT_KHR = 0x00000020, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR = 0x00000040, + VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR = 0x00000080, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100, + VK_SURFACE_TRANSFORM_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkSurfaceTransformFlagBitsKHR; +typedef VkFlags VkSurfaceTransformFlagsKHR; + +typedef enum VkCompositeAlphaFlagBitsKHR { + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR = 0x00000002, + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR = 0x00000004, + VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR = 0x00000008, + VK_COMPOSITE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkCompositeAlphaFlagBitsKHR; +typedef VkFlags VkCompositeAlphaFlagsKHR; + +typedef struct VkSurfaceCapabilitiesKHR { + uint32_t minImageCount; + uint32_t maxImageCount; + VkExtent2D currentExtent; + VkExtent2D minImageExtent; + VkExtent2D maxImageExtent; + uint32_t maxImageArrayLayers; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkSurfaceTransformFlagBitsKHR currentTransform; + VkCompositeAlphaFlagsKHR supportedCompositeAlpha; + VkImageUsageFlags supportedUsageFlags; +} VkSurfaceCapabilitiesKHR; + +typedef struct VkSurfaceFormatKHR { + VkFormat format; + VkColorSpaceKHR colorSpace; +} VkSurfaceFormatKHR; + +typedef void(VKAPI_PTR *PFN_vkDestroySurfaceKHR)(VkInstance instance, VkSurfaceKHR surface, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceSupportKHR)(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, VkSurfaceKHR surface, VkBool32 *pSupported); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilitiesKHR *pSurfaceCapabilities); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormatsKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t *pSurfaceFormatCount, VkSurfaceFormatKHR *pSurfaceFormats); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfacePresentModesKHR)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t *pPresentModeCount, VkPresentModeKHR *pPresentModes); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkDestroySurfaceKHR( + VkInstance instance, + VkSurfaceKHR surface, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32 *pSupported); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR *pSurfaceCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormatKHR *pSurfaceFormats); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pPresentModeCount, + VkPresentModeKHR *pPresentModes); +#endif + +#define VK_KHR_swapchain 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR) + +#define VK_KHR_SWAPCHAIN_SPEC_VERSION 68 +#define VK_KHR_SWAPCHAIN_EXTENSION_NAME "VK_KHR_swapchain" + +typedef enum VkSwapchainCreateFlagBitsKHR { + VK_SWAPCHAIN_CREATE_BIND_SFR_BIT_KHX = 0x00000001, + VK_SWAPCHAIN_CREATE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkSwapchainCreateFlagBitsKHR; +typedef VkFlags VkSwapchainCreateFlagsKHR; + +typedef struct VkSwapchainCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkSwapchainCreateFlagsKHR flags; + VkSurfaceKHR surface; + uint32_t minImageCount; + VkFormat imageFormat; + VkColorSpaceKHR imageColorSpace; + VkExtent2D imageExtent; + uint32_t imageArrayLayers; + VkImageUsageFlags imageUsage; + VkSharingMode imageSharingMode; + uint32_t queueFamilyIndexCount; + const uint32_t *pQueueFamilyIndices; + VkSurfaceTransformFlagBitsKHR preTransform; + VkCompositeAlphaFlagBitsKHR compositeAlpha; + VkPresentModeKHR presentMode; + VkBool32 clipped; + VkSwapchainKHR oldSwapchain; +} VkSwapchainCreateInfoKHR; + +typedef struct VkPresentInfoKHR { + VkStructureType sType; + const void *pNext; + uint32_t waitSemaphoreCount; + const VkSemaphore *pWaitSemaphores; + uint32_t swapchainCount; + const VkSwapchainKHR *pSwapchains; + const uint32_t *pImageIndices; + VkResult *pResults; +} VkPresentInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateSwapchainKHR)(VkDevice device, const VkSwapchainCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSwapchainKHR *pSwapchain); +typedef void(VKAPI_PTR *PFN_vkDestroySwapchainKHR)(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkGetSwapchainImagesKHR)(VkDevice device, VkSwapchainKHR swapchain, uint32_t *pSwapchainImageCount, VkImage *pSwapchainImages); +typedef VkResult(VKAPI_PTR *PFN_vkAcquireNextImageKHR)(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t *pImageIndex); +typedef VkResult(VKAPI_PTR *PFN_vkQueuePresentKHR)(VkQueue queue, const VkPresentInfoKHR *pPresentInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSwapchainKHR( + VkDevice device, + const VkSwapchainCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSwapchainKHR *pSwapchain); + +VKAPI_ATTR void VKAPI_CALL vkDestroySwapchainKHR( + VkDevice device, + VkSwapchainKHR swapchain, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainImagesKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t *pSwapchainImageCount, + VkImage *pSwapchainImages); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImageKHR( + VkDevice device, + VkSwapchainKHR swapchain, + uint64_t timeout, + VkSemaphore semaphore, + VkFence fence, + uint32_t *pImageIndex); + +VKAPI_ATTR VkResult VKAPI_CALL vkQueuePresentKHR( + VkQueue queue, + const VkPresentInfoKHR *pPresentInfo); +#endif + +#define VK_KHR_display 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) + +#define VK_KHR_DISPLAY_SPEC_VERSION 21 +#define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" + +typedef enum VkDisplayPlaneAlphaFlagBitsKHR { + VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR = 0x00000001, + VK_DISPLAY_PLANE_ALPHA_GLOBAL_BIT_KHR = 0x00000002, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_BIT_KHR = 0x00000004, + VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_PREMULTIPLIED_BIT_KHR = 0x00000008, + VK_DISPLAY_PLANE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkDisplayPlaneAlphaFlagBitsKHR; +typedef VkFlags VkDisplayPlaneAlphaFlagsKHR; +typedef VkFlags VkDisplayModeCreateFlagsKHR; +typedef VkFlags VkDisplaySurfaceCreateFlagsKHR; + +typedef struct VkDisplayPropertiesKHR { + VkDisplayKHR display; + const char *displayName; + VkExtent2D physicalDimensions; + VkExtent2D physicalResolution; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkBool32 planeReorderPossible; + VkBool32 persistentContent; +} VkDisplayPropertiesKHR; + +typedef struct VkDisplayModeParametersKHR { + VkExtent2D visibleRegion; + uint32_t refreshRate; +} VkDisplayModeParametersKHR; + +typedef struct VkDisplayModePropertiesKHR { + VkDisplayModeKHR displayMode; + VkDisplayModeParametersKHR parameters; +} VkDisplayModePropertiesKHR; + +typedef struct VkDisplayModeCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkDisplayModeCreateFlagsKHR flags; + VkDisplayModeParametersKHR parameters; +} VkDisplayModeCreateInfoKHR; + +typedef struct VkDisplayPlaneCapabilitiesKHR { + VkDisplayPlaneAlphaFlagsKHR supportedAlpha; + VkOffset2D minSrcPosition; + VkOffset2D maxSrcPosition; + VkExtent2D minSrcExtent; + VkExtent2D maxSrcExtent; + VkOffset2D minDstPosition; + VkOffset2D maxDstPosition; + VkExtent2D minDstExtent; + VkExtent2D maxDstExtent; +} VkDisplayPlaneCapabilitiesKHR; + +typedef struct VkDisplayPlanePropertiesKHR { + VkDisplayKHR currentDisplay; + uint32_t currentStackIndex; +} VkDisplayPlanePropertiesKHR; + +typedef struct VkDisplaySurfaceCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkDisplaySurfaceCreateFlagsKHR flags; + VkDisplayModeKHR displayMode; + uint32_t planeIndex; + uint32_t planeStackIndex; + VkSurfaceTransformFlagBitsKHR transform; + float globalAlpha; + VkDisplayPlaneAlphaFlagBitsKHR alphaMode; + VkExtent2D imageExtent; +} VkDisplaySurfaceCreateInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkDisplayPropertiesKHR *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkDisplayPlanePropertiesKHR *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkGetDisplayPlaneSupportedDisplaysKHR)(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t *pDisplayCount, VkDisplayKHR *pDisplays); +typedef VkResult(VKAPI_PTR *PFN_vkGetDisplayModePropertiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t *pPropertyCount, VkDisplayModePropertiesKHR *pProperties); +typedef VkResult(VKAPI_PTR *PFN_vkCreateDisplayModeKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDisplayModeKHR *pMode); +typedef VkResult(VKAPI_PTR *PFN_vkGetDisplayPlaneCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR *pCapabilities); +typedef VkResult(VKAPI_PTR *PFN_vkCreateDisplayPlaneSurfaceKHR)(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkDisplayPropertiesKHR *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceDisplayPlanePropertiesKHR( + VkPhysicalDevice physicalDevice, + uint32_t *pPropertyCount, + VkDisplayPlanePropertiesKHR *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneSupportedDisplaysKHR( + VkPhysicalDevice physicalDevice, + uint32_t planeIndex, + uint32_t *pDisplayCount, + VkDisplayKHR *pDisplays); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayModePropertiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + uint32_t *pPropertyCount, + VkDisplayModePropertiesKHR *pProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayModeKHR( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display, + const VkDisplayModeCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDisplayModeKHR *pMode); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDisplayPlaneCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkDisplayModeKHR mode, + uint32_t planeIndex, + VkDisplayPlaneCapabilitiesKHR *pCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( + VkInstance instance, + const VkDisplaySurfaceCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +#endif + +#define VK_KHR_display_swapchain 1 +#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 9 +#define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" + +typedef struct VkDisplayPresentInfoKHR { + VkStructureType sType; + const void *pNext; + VkRect2D srcRect; + VkRect2D dstRect; + VkBool32 persistent; +} VkDisplayPresentInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateSharedSwapchainsKHR)(VkDevice device, uint32_t swapchainCount, const VkSwapchainCreateInfoKHR *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkSwapchainKHR *pSwapchains); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( + VkDevice device, + uint32_t swapchainCount, + const VkSwapchainCreateInfoKHR *pCreateInfos, + const VkAllocationCallbacks *pAllocator, + VkSwapchainKHR *pSwapchains); +#endif + +#define VK_KHR_sampler_mirror_clamp_to_edge 1 +#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_SPEC_VERSION 1 +#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME "VK_KHR_sampler_mirror_clamp_to_edge" + +#define VK_KHR_get_physical_device_properties2 1 +#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 1 +#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2" + +typedef struct VkPhysicalDeviceFeatures2KHR { + VkStructureType sType; + void *pNext; + VkPhysicalDeviceFeatures features; +} VkPhysicalDeviceFeatures2KHR; + +typedef struct VkPhysicalDeviceProperties2KHR { + VkStructureType sType; + void *pNext; + VkPhysicalDeviceProperties properties; +} VkPhysicalDeviceProperties2KHR; + +typedef struct VkFormatProperties2KHR { + VkStructureType sType; + void *pNext; + VkFormatProperties formatProperties; +} VkFormatProperties2KHR; + +typedef struct VkImageFormatProperties2KHR { + VkStructureType sType; + void *pNext; + VkImageFormatProperties imageFormatProperties; +} VkImageFormatProperties2KHR; + +typedef struct VkPhysicalDeviceImageFormatInfo2KHR { + VkStructureType sType; + const void *pNext; + VkFormat format; + VkImageType type; + VkImageTiling tiling; + VkImageUsageFlags usage; + VkImageCreateFlags flags; +} VkPhysicalDeviceImageFormatInfo2KHR; + +typedef struct VkQueueFamilyProperties2KHR { + VkStructureType sType; + void *pNext; + VkQueueFamilyProperties queueFamilyProperties; +} VkQueueFamilyProperties2KHR; + +typedef struct VkPhysicalDeviceMemoryProperties2KHR { + VkStructureType sType; + void *pNext; + VkPhysicalDeviceMemoryProperties memoryProperties; +} VkPhysicalDeviceMemoryProperties2KHR; + +typedef struct VkSparseImageFormatProperties2KHR { + VkStructureType sType; + void *pNext; + VkSparseImageFormatProperties properties; +} VkSparseImageFormatProperties2KHR; + +typedef struct VkPhysicalDeviceSparseImageFormatInfo2KHR { + VkStructureType sType; + const void *pNext; + VkFormat format; + VkImageType type; + VkSampleCountFlagBits samples; + VkImageUsageFlags usage; + VkImageTiling tiling; +} VkPhysicalDeviceSparseImageFormatInfo2KHR; + +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceFeatures2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures2KHR *pFeatures); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceProperties2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceProperties2KHR *pProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceFormatProperties2KHR)(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2KHR *pFormatProperties); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceImageFormatProperties2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo, VkImageFormatProperties2KHR *pImageFormatProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceQueueFamilyProperties2KHR)(VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties2KHR *pQueueFamilyProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceMemoryProperties2KHR)(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceSparseImageFormatProperties2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2KHR *pFormatInfo, uint32_t *pPropertyCount, VkSparseImageFormatProperties2KHR *pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFeatures2KHR( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceFeatures2KHR *pFeatures); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceProperties2KHR( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceProperties2KHR *pProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceFormatProperties2KHR( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2KHR *pFormatProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceImageFormatProperties2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo, + VkImageFormatProperties2KHR *pImageFormatProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceQueueFamilyProperties2KHR( + VkPhysicalDevice physicalDevice, + uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2KHR *pQueueFamilyProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMemoryProperties2KHR( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2KHR *pMemoryProperties); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2KHR *pFormatInfo, + uint32_t *pPropertyCount, + VkSparseImageFormatProperties2KHR *pProperties); +#endif + +#define VK_KHR_shader_draw_parameters 1 +#define VK_KHR_SHADER_DRAW_PARAMETERS_SPEC_VERSION 1 +#define VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME "VK_KHR_shader_draw_parameters" + +#define VK_KHR_maintenance1 1 +#define VK_KHR_MAINTENANCE1_SPEC_VERSION 1 +#define VK_KHR_MAINTENANCE1_EXTENSION_NAME "VK_KHR_maintenance1" + +typedef VkFlags VkCommandPoolTrimFlagsKHR; + +typedef void(VKAPI_PTR *PFN_vkTrimCommandPoolKHR)(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlagsKHR flags); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkTrimCommandPoolKHR( + VkDevice device, + VkCommandPool commandPool, + VkCommandPoolTrimFlagsKHR flags); +#endif + +#define VK_KHR_external_memory_capabilities 1 +#define VK_LUID_SIZE_KHR 8 +#define VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_memory_capabilities" + +typedef enum VkExternalMemoryHandleTypeFlagBitsKHR { + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR = 0x00000001, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR = 0x00000002, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR = 0x00000004, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT_KHR = 0x00000008, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_KMT_BIT_KHR = 0x00000010, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT_KHR = 0x00000020, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT_KHR = 0x00000040, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalMemoryHandleTypeFlagBitsKHR; +typedef VkFlags VkExternalMemoryHandleTypeFlagsKHR; + +typedef enum VkExternalMemoryFeatureFlagBitsKHR { + VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHR = 0x00000001, + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHR = 0x00000002, + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHR = 0x00000004, + VK_EXTERNAL_MEMORY_FEATURE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalMemoryFeatureFlagBitsKHR; +typedef VkFlags VkExternalMemoryFeatureFlagsKHR; + +typedef struct VkExternalMemoryPropertiesKHR { + VkExternalMemoryFeatureFlagsKHR externalMemoryFeatures; + VkExternalMemoryHandleTypeFlagsKHR exportFromImportedHandleTypes; + VkExternalMemoryHandleTypeFlagsKHR compatibleHandleTypes; +} VkExternalMemoryPropertiesKHR; + +typedef struct VkPhysicalDeviceExternalImageFormatInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagBitsKHR handleType; +} VkPhysicalDeviceExternalImageFormatInfoKHR; + +typedef struct VkExternalImageFormatPropertiesKHR { + VkStructureType sType; + void *pNext; + VkExternalMemoryPropertiesKHR externalMemoryProperties; +} VkExternalImageFormatPropertiesKHR; + +typedef struct VkPhysicalDeviceExternalBufferInfoKHR { + VkStructureType sType; + const void *pNext; + VkBufferCreateFlags flags; + VkBufferUsageFlags usage; + VkExternalMemoryHandleTypeFlagBitsKHR handleType; +} VkPhysicalDeviceExternalBufferInfoKHR; + +typedef struct VkExternalBufferPropertiesKHR { + VkStructureType sType; + void *pNext; + VkExternalMemoryPropertiesKHR externalMemoryProperties; +} VkExternalBufferPropertiesKHR; + +typedef struct VkPhysicalDeviceIDPropertiesKHR { + VkStructureType sType; + void *pNext; + uint8_t deviceUUID[VK_UUID_SIZE]; + uint8_t driverUUID[VK_UUID_SIZE]; + uint8_t deviceLUID[VK_LUID_SIZE_KHR]; + uint32_t deviceNodeMask; + VkBool32 deviceLUIDValid; +} VkPhysicalDeviceIDPropertiesKHR; + +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalBufferPropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfoKHR *pExternalBufferInfo, VkExternalBufferPropertiesKHR *pExternalBufferProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalBufferPropertiesKHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfoKHR *pExternalBufferInfo, + VkExternalBufferPropertiesKHR *pExternalBufferProperties); +#endif + +#define VK_KHR_external_memory 1 +#define VK_KHR_EXTERNAL_MEMORY_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME "VK_KHR_external_memory" +#define VK_QUEUE_FAMILY_EXTERNAL_KHR (~0U - 1) + +typedef struct VkExternalMemoryImageCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagsKHR handleTypes; +} VkExternalMemoryImageCreateInfoKHR; + +typedef struct VkExternalMemoryBufferCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagsKHR handleTypes; +} VkExternalMemoryBufferCreateInfoKHR; + +typedef struct VkExportMemoryAllocateInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagsKHR handleTypes; +} VkExportMemoryAllocateInfoKHR; + +#define VK_KHR_external_memory_fd 1 +#define VK_KHR_EXTERNAL_MEMORY_FD_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME "VK_KHR_external_memory_fd" + +typedef struct VkImportMemoryFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagBitsKHR handleType; + int fd; +} VkImportMemoryFdInfoKHR; + +typedef struct VkMemoryFdPropertiesKHR { + VkStructureType sType; + void *pNext; + uint32_t memoryTypeBits; +} VkMemoryFdPropertiesKHR; + +typedef struct VkMemoryGetFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkDeviceMemory memory; + VkExternalMemoryHandleTypeFlagBitsKHR handleType; +} VkMemoryGetFdInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkGetMemoryFdKHR)(VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFd); +typedef VkResult(VKAPI_PTR *PFN_vkGetMemoryFdPropertiesKHR)(VkDevice device, VkExternalMemoryHandleTypeFlagBitsKHR handleType, int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryFdKHR( + VkDevice device, + const VkMemoryGetFdInfoKHR *pGetFdInfo, + int *pFd); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryFdPropertiesKHR( + VkDevice device, + VkExternalMemoryHandleTypeFlagBitsKHR handleType, + int fd, + VkMemoryFdPropertiesKHR *pMemoryFdProperties); +#endif + +#define VK_KHR_external_semaphore_capabilities 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_semaphore_capabilities" + +typedef enum VkExternalSemaphoreHandleTypeFlagBitsKHR { + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR = 0x00000001, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR = 0x00000002, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR = 0x00000004, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE_BIT_KHR = 0x00000008, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR = 0x00000010, + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalSemaphoreHandleTypeFlagBitsKHR; +typedef VkFlags VkExternalSemaphoreHandleTypeFlagsKHR; + +typedef enum VkExternalSemaphoreFeatureFlagBitsKHR { + VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR = 0x00000001, + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR = 0x00000002, + VK_EXTERNAL_SEMAPHORE_FEATURE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalSemaphoreFeatureFlagBitsKHR; +typedef VkFlags VkExternalSemaphoreFeatureFlagsKHR; + +typedef struct VkPhysicalDeviceExternalSemaphoreInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalSemaphoreHandleTypeFlagBitsKHR handleType; +} VkPhysicalDeviceExternalSemaphoreInfoKHR; + +typedef struct VkExternalSemaphorePropertiesKHR { + VkStructureType sType; + void *pNext; + VkExternalSemaphoreHandleTypeFlagsKHR exportFromImportedHandleTypes; + VkExternalSemaphoreHandleTypeFlagsKHR compatibleHandleTypes; + VkExternalSemaphoreFeatureFlagsKHR externalSemaphoreFeatures; +} VkExternalSemaphorePropertiesKHR; + +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalSemaphorePropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalSemaphoreInfoKHR *pExternalSemaphoreInfo, VkExternalSemaphorePropertiesKHR *pExternalSemaphoreProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalSemaphorePropertiesKHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalSemaphoreInfoKHR *pExternalSemaphoreInfo, + VkExternalSemaphorePropertiesKHR *pExternalSemaphoreProperties); +#endif + +#define VK_KHR_external_semaphore 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME "VK_KHR_external_semaphore" + +typedef enum VkSemaphoreImportFlagBitsKHR { + VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR = 0x00000001, + VK_SEMAPHORE_IMPORT_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkSemaphoreImportFlagBitsKHR; +typedef VkFlags VkSemaphoreImportFlagsKHR; + +typedef struct VkExportSemaphoreCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalSemaphoreHandleTypeFlagsKHR handleTypes; +} VkExportSemaphoreCreateInfoKHR; + +#define VK_KHR_external_semaphore_fd 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_FD_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME "VK_KHR_external_semaphore_fd" + +typedef struct VkImportSemaphoreFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkSemaphore semaphore; + VkSemaphoreImportFlagsKHR flags; + VkExternalSemaphoreHandleTypeFlagBitsKHR handleType; + int fd; +} VkImportSemaphoreFdInfoKHR; + +typedef struct VkSemaphoreGetFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkSemaphore semaphore; + VkExternalSemaphoreHandleTypeFlagBitsKHR handleType; +} VkSemaphoreGetFdInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkImportSemaphoreFdKHR)(VkDevice device, const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo); +typedef VkResult(VKAPI_PTR *PFN_vkGetSemaphoreFdKHR)(VkDevice device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkImportSemaphoreFdKHR( + VkDevice device, + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreFdKHR( + VkDevice device, + const VkSemaphoreGetFdInfoKHR *pGetFdInfo, + int *pFd); +#endif + +#define VK_KHR_push_descriptor 1 +#define VK_KHR_PUSH_DESCRIPTOR_SPEC_VERSION 1 +#define VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME "VK_KHR_push_descriptor" + +typedef struct VkPhysicalDevicePushDescriptorPropertiesKHR { + VkStructureType sType; + void *pNext; + uint32_t maxPushDescriptors; +} VkPhysicalDevicePushDescriptorPropertiesKHR; + +typedef void(VKAPI_PTR *PFN_vkCmdPushDescriptorSetKHR)(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t set, uint32_t descriptorWriteCount, const VkWriteDescriptorSet *pDescriptorWrites); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdPushDescriptorSetKHR( + VkCommandBuffer commandBuffer, + VkPipelineBindPoint pipelineBindPoint, + VkPipelineLayout layout, + uint32_t set, + uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites); +#endif + +#define VK_KHR_16bit_storage 1 +#define VK_KHR_16BIT_STORAGE_SPEC_VERSION 1 +#define VK_KHR_16BIT_STORAGE_EXTENSION_NAME "VK_KHR_16bit_storage" + +typedef struct VkPhysicalDevice16BitStorageFeaturesKHR { + VkStructureType sType; + void *pNext; + VkBool32 storageBuffer16BitAccess; + VkBool32 uniformAndStorageBuffer16BitAccess; + VkBool32 storagePushConstant16; + VkBool32 storageInputOutput16; +} VkPhysicalDevice16BitStorageFeaturesKHR; + +#define VK_KHR_incremental_present 1 +#define VK_KHR_INCREMENTAL_PRESENT_SPEC_VERSION 1 +#define VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME "VK_KHR_incremental_present" + +typedef struct VkRectLayerKHR { + VkOffset2D offset; + VkExtent2D extent; + uint32_t layer; +} VkRectLayerKHR; + +typedef struct VkPresentRegionKHR { + uint32_t rectangleCount; + const VkRectLayerKHR *pRectangles; +} VkPresentRegionKHR; + +typedef struct VkPresentRegionsKHR { + VkStructureType sType; + const void *pNext; + uint32_t swapchainCount; + const VkPresentRegionKHR *pRegions; +} VkPresentRegionsKHR; + +#define VK_KHR_descriptor_update_template 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDescriptorUpdateTemplateKHR) + +#define VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_SPEC_VERSION 1 +#define VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME "VK_KHR_descriptor_update_template" + +typedef enum VkDescriptorUpdateTemplateTypeKHR { + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR = 0, + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR = 1, + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_BEGIN_RANGE_KHR = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_END_RANGE_KHR = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR, + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_RANGE_SIZE_KHR = (VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR - VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR + 1), // NOLINT: misc-redundant-expression + VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_MAX_ENUM_KHR = 0x7FFFFFFF +} VkDescriptorUpdateTemplateTypeKHR; + +typedef VkFlags VkDescriptorUpdateTemplateCreateFlagsKHR; + +typedef struct VkDescriptorUpdateTemplateEntryKHR { + uint32_t dstBinding; + uint32_t dstArrayElement; + uint32_t descriptorCount; + VkDescriptorType descriptorType; + size_t offset; + size_t stride; +} VkDescriptorUpdateTemplateEntryKHR; + +typedef struct VkDescriptorUpdateTemplateCreateInfoKHR { + VkStructureType sType; + void *pNext; + VkDescriptorUpdateTemplateCreateFlagsKHR flags; + uint32_t descriptorUpdateEntryCount; + const VkDescriptorUpdateTemplateEntryKHR *pDescriptorUpdateEntries; + VkDescriptorUpdateTemplateTypeKHR templateType; + VkDescriptorSetLayout descriptorSetLayout; + VkPipelineBindPoint pipelineBindPoint; + VkPipelineLayout pipelineLayout; + uint32_t set; +} VkDescriptorUpdateTemplateCreateInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateDescriptorUpdateTemplateKHR)(VkDevice device, const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate); +typedef void(VKAPI_PTR *PFN_vkDestroyDescriptorUpdateTemplateKHR)(VkDevice device, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const VkAllocationCallbacks *pAllocator); +typedef void(VKAPI_PTR *PFN_vkUpdateDescriptorSetWithTemplateKHR)(VkDevice device, VkDescriptorSet descriptorSet, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, const void *pData); +typedef void(VKAPI_PTR *PFN_vkCmdPushDescriptorSetWithTemplateKHR)(VkCommandBuffer commandBuffer, VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, VkPipelineLayout layout, uint32_t set, const void *pData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDescriptorUpdateTemplateKHR( + VkDevice device, + const VkDescriptorUpdateTemplateCreateInfoKHR *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorUpdateTemplateKHR *pDescriptorUpdateTemplate); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDescriptorUpdateTemplateKHR( + VkDevice device, + VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkUpdateDescriptorSetWithTemplateKHR( + VkDevice device, + VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + const void *pData); + +VKAPI_ATTR void VKAPI_CALL vkCmdPushDescriptorSetWithTemplateKHR( + VkCommandBuffer commandBuffer, + VkDescriptorUpdateTemplateKHR descriptorUpdateTemplate, + VkPipelineLayout layout, + uint32_t set, + const void *pData); +#endif + +#define VK_KHR_shared_presentable_image 1 +#define VK_KHR_SHARED_PRESENTABLE_IMAGE_SPEC_VERSION 1 +#define VK_KHR_SHARED_PRESENTABLE_IMAGE_EXTENSION_NAME "VK_KHR_shared_presentable_image" + +typedef struct VkSharedPresentSurfaceCapabilitiesKHR { + VkStructureType sType; + void *pNext; + VkImageUsageFlags sharedPresentSupportedUsageFlags; +} VkSharedPresentSurfaceCapabilitiesKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkGetSwapchainStatusKHR)(VkDevice device, VkSwapchainKHR swapchain); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainStatusKHR( + VkDevice device, + VkSwapchainKHR swapchain); +#endif + +#define VK_KHR_external_fence_capabilities 1 +#define VK_KHR_EXTERNAL_FENCE_CAPABILITIES_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_FENCE_CAPABILITIES_EXTENSION_NAME "VK_KHR_external_fence_capabilities" + +typedef enum VkExternalFenceHandleTypeFlagBitsKHR { + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR = 0x00000001, + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR = 0x00000002, + VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR = 0x00000004, + VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR = 0x00000008, + VK_EXTERNAL_FENCE_HANDLE_TYPE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalFenceHandleTypeFlagBitsKHR; +typedef VkFlags VkExternalFenceHandleTypeFlagsKHR; + +typedef enum VkExternalFenceFeatureFlagBitsKHR { + VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR = 0x00000001, + VK_EXTERNAL_FENCE_FEATURE_IMPORTABLE_BIT_KHR = 0x00000002, + VK_EXTERNAL_FENCE_FEATURE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkExternalFenceFeatureFlagBitsKHR; +typedef VkFlags VkExternalFenceFeatureFlagsKHR; + +typedef struct VkPhysicalDeviceExternalFenceInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalFenceHandleTypeFlagBitsKHR handleType; +} VkPhysicalDeviceExternalFenceInfoKHR; + +typedef struct VkExternalFencePropertiesKHR { + VkStructureType sType; + void *pNext; + VkExternalFenceHandleTypeFlagsKHR exportFromImportedHandleTypes; + VkExternalFenceHandleTypeFlagsKHR compatibleHandleTypes; + VkExternalFenceFeatureFlagsKHR externalFenceFeatures; +} VkExternalFencePropertiesKHR; + +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalFencePropertiesKHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfoKHR *pExternalFenceInfo, VkExternalFencePropertiesKHR *pExternalFenceProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceExternalFencePropertiesKHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalFenceInfoKHR *pExternalFenceInfo, + VkExternalFencePropertiesKHR *pExternalFenceProperties); +#endif + +#define VK_KHR_external_fence 1 +#define VK_KHR_EXTERNAL_FENCE_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_FENCE_EXTENSION_NAME "VK_KHR_external_fence" + +typedef enum VkFenceImportFlagBitsKHR { + VK_FENCE_IMPORT_TEMPORARY_BIT_KHR = 0x00000001, + VK_FENCE_IMPORT_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkFenceImportFlagBitsKHR; +typedef VkFlags VkFenceImportFlagsKHR; + +typedef struct VkExportFenceCreateInfoKHR { + VkStructureType sType; + const void *pNext; + VkExternalFenceHandleTypeFlagsKHR handleTypes; +} VkExportFenceCreateInfoKHR; + +#define VK_KHR_external_fence_fd 1 +#define VK_KHR_EXTERNAL_FENCE_FD_SPEC_VERSION 1 +#define VK_KHR_EXTERNAL_FENCE_FD_EXTENSION_NAME "VK_KHR_external_fence_fd" + +typedef struct VkImportFenceFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkFence fence; + VkFenceImportFlagsKHR flags; + VkExternalFenceHandleTypeFlagBitsKHR handleType; + int fd; +} VkImportFenceFdInfoKHR; + +typedef struct VkFenceGetFdInfoKHR { + VkStructureType sType; + const void *pNext; + VkFence fence; + VkExternalFenceHandleTypeFlagBitsKHR handleType; +} VkFenceGetFdInfoKHR; + +typedef VkResult(VKAPI_PTR *PFN_vkImportFenceFdKHR)(VkDevice device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo); +typedef VkResult(VKAPI_PTR *PFN_vkGetFenceFdKHR)(VkDevice device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkImportFenceFdKHR( + VkDevice device, + const VkImportFenceFdInfoKHR *pImportFenceFdInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetFenceFdKHR( + VkDevice device, + const VkFenceGetFdInfoKHR *pGetFdInfo, + int *pFd); +#endif + +#define VK_KHR_get_surface_capabilities2 1 +#define VK_KHR_GET_SURFACE_CAPABILITIES_2_SPEC_VERSION 1 +#define VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME "VK_KHR_get_surface_capabilities2" + +typedef struct VkPhysicalDeviceSurfaceInfo2KHR { + VkStructureType sType; + const void *pNext; + VkSurfaceKHR surface; +} VkPhysicalDeviceSurfaceInfo2KHR; + +typedef struct VkSurfaceCapabilities2KHR { + VkStructureType sType; + void *pNext; + VkSurfaceCapabilitiesKHR surfaceCapabilities; +} VkSurfaceCapabilities2KHR; + +typedef struct VkSurfaceFormat2KHR { + VkStructureType sType; + void *pNext; + VkSurfaceFormatKHR surfaceFormat; +} VkSurfaceFormat2KHR; + +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, VkSurfaceCapabilities2KHR *pSurfaceCapabilities); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormats2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, uint32_t *pSurfaceFormatCount, VkSurfaceFormat2KHR *pSurfaceFormats); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + VkSurfaceCapabilities2KHR *pSurfaceCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormats2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, + uint32_t *pSurfaceFormatCount, + VkSurfaceFormat2KHR *pSurfaceFormats); +#endif + +#define VK_KHR_variable_pointers 1 +#define VK_KHR_VARIABLE_POINTERS_SPEC_VERSION 1 +#define VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME "VK_KHR_variable_pointers" + +typedef struct VkPhysicalDeviceVariablePointerFeaturesKHR { + VkStructureType sType; + void *pNext; + VkBool32 variablePointersStorageBuffer; + VkBool32 variablePointers; +} VkPhysicalDeviceVariablePointerFeaturesKHR; + +#define VK_KHR_dedicated_allocation 1 +#define VK_KHR_DEDICATED_ALLOCATION_SPEC_VERSION 3 +#define VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_KHR_dedicated_allocation" + +typedef struct VkMemoryDedicatedRequirementsKHR { + VkStructureType sType; + void *pNext; + VkBool32 prefersDedicatedAllocation; + VkBool32 requiresDedicatedAllocation; +} VkMemoryDedicatedRequirementsKHR; + +typedef struct VkMemoryDedicatedAllocateInfoKHR { + VkStructureType sType; + const void *pNext; + VkImage image; + VkBuffer buffer; +} VkMemoryDedicatedAllocateInfoKHR; + +#define VK_KHR_storage_buffer_storage_class 1 +#define VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_SPEC_VERSION 1 +#define VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME "VK_KHR_storage_buffer_storage_class" + +#define VK_KHR_relaxed_block_layout 1 +#define VK_KHR_RELAXED_BLOCK_LAYOUT_SPEC_VERSION 1 +#define VK_KHR_RELAXED_BLOCK_LAYOUT_EXTENSION_NAME "VK_KHR_relaxed_block_layout" + +#define VK_KHR_get_memory_requirements2 1 +#define VK_KHR_GET_MEMORY_REQUIREMENTS_2_SPEC_VERSION 1 +#define VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME "VK_KHR_get_memory_requirements2" + +typedef struct VkBufferMemoryRequirementsInfo2KHR { + VkStructureType sType; + const void *pNext; + VkBuffer buffer; +} VkBufferMemoryRequirementsInfo2KHR; + +typedef struct VkImageMemoryRequirementsInfo2KHR { + VkStructureType sType; + const void *pNext; + VkImage image; +} VkImageMemoryRequirementsInfo2KHR; + +typedef struct VkImageSparseMemoryRequirementsInfo2KHR { + VkStructureType sType; + const void *pNext; + VkImage image; +} VkImageSparseMemoryRequirementsInfo2KHR; + +typedef struct VkMemoryRequirements2KHR { + VkStructureType sType; + void *pNext; + VkMemoryRequirements memoryRequirements; +} VkMemoryRequirements2KHR; + +typedef struct VkSparseImageMemoryRequirements2KHR { + VkStructureType sType; + void *pNext; + VkSparseImageMemoryRequirements memoryRequirements; +} VkSparseImageMemoryRequirements2KHR; + +typedef void(VKAPI_PTR *PFN_vkGetImageMemoryRequirements2KHR)(VkDevice device, const VkImageMemoryRequirementsInfo2KHR *pInfo, VkMemoryRequirements2KHR *pMemoryRequirements); +typedef void(VKAPI_PTR *PFN_vkGetBufferMemoryRequirements2KHR)(VkDevice device, const VkBufferMemoryRequirementsInfo2KHR *pInfo, VkMemoryRequirements2KHR *pMemoryRequirements); +typedef void(VKAPI_PTR *PFN_vkGetImageSparseMemoryRequirements2KHR)(VkDevice device, const VkImageSparseMemoryRequirementsInfo2KHR *pInfo, uint32_t *pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2KHR *pSparseMemoryRequirements); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements2KHR( + VkDevice device, + const VkImageMemoryRequirementsInfo2KHR *pInfo, + VkMemoryRequirements2KHR *pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetBufferMemoryRequirements2KHR( + VkDevice device, + const VkBufferMemoryRequirementsInfo2KHR *pInfo, + VkMemoryRequirements2KHR *pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements2KHR( + VkDevice device, + const VkImageSparseMemoryRequirementsInfo2KHR *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2KHR *pSparseMemoryRequirements); +#endif + +#define VK_EXT_debug_report 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT) + +#define VK_EXT_DEBUG_REPORT_SPEC_VERSION 8 +#define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report" +#define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT +#define VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT + +typedef enum VkDebugReportObjectTypeEXT { + VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT = 0, + VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT = 1, + VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT = 2, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT = 3, + VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT = 4, + VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT = 5, + VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT = 6, + VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT = 7, + VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT = 8, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT = 9, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT = 10, + VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT = 11, + VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT = 12, + VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT = 13, + VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT = 14, + VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT = 15, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT = 16, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT = 17, + VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT = 18, + VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT = 19, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT = 20, + VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT = 21, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT = 22, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT = 23, + VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT = 24, + VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT = 25, + VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT = 26, + VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT = 27, + VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT = 28, + VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT = 29, + VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30, + VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31, + VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32, + VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = 33, + VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = 1000085000, + VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, + VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT, + VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1), // NOLINT: misc-redundant-expression + VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDebugReportObjectTypeEXT; + +typedef enum VkDebugReportFlagBitsEXT { + VK_DEBUG_REPORT_INFORMATION_BIT_EXT = 0x00000001, + VK_DEBUG_REPORT_WARNING_BIT_EXT = 0x00000002, + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT = 0x00000004, + VK_DEBUG_REPORT_ERROR_BIT_EXT = 0x00000008, + VK_DEBUG_REPORT_DEBUG_BIT_EXT = 0x00000010, + VK_DEBUG_REPORT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDebugReportFlagBitsEXT; +typedef VkFlags VkDebugReportFlagsEXT; + +typedef VkBool32(VKAPI_PTR *PFN_vkDebugReportCallbackEXT)( + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char *pLayerPrefix, + const char *pMessage, + void *pUserData); + +typedef struct VkDebugReportCallbackCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkDebugReportFlagsEXT flags; + PFN_vkDebugReportCallbackEXT pfnCallback; + void *pUserData; +} VkDebugReportCallbackCreateInfoEXT; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateDebugReportCallbackEXT)(VkInstance instance, const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDebugReportCallbackEXT *pCallback); +typedef void(VKAPI_PTR *PFN_vkDestroyDebugReportCallbackEXT)(VkInstance instance, VkDebugReportCallbackEXT callback, const VkAllocationCallbacks *pAllocator); +typedef void(VKAPI_PTR *PFN_vkDebugReportMessageEXT)(VkInstance instance, VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, uint64_t object, size_t location, int32_t messageCode, const char *pLayerPrefix, const char *pMessage); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateDebugReportCallbackEXT( + VkInstance instance, + const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDebugReportCallbackEXT *pCallback); + +VKAPI_ATTR void VKAPI_CALL vkDestroyDebugReportCallbackEXT( + VkInstance instance, + VkDebugReportCallbackEXT callback, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR void VKAPI_CALL vkDebugReportMessageEXT( + VkInstance instance, + VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objectType, + uint64_t object, + size_t location, + int32_t messageCode, + const char *pLayerPrefix, + const char *pMessage); +#endif + +#define VK_NV_glsl_shader 1 +#define VK_NV_GLSL_SHADER_SPEC_VERSION 1 +#define VK_NV_GLSL_SHADER_EXTENSION_NAME "VK_NV_glsl_shader" + +#define VK_EXT_depth_range_unrestricted 1 +#define VK_EXT_DEPTH_RANGE_UNRESTRICTED_SPEC_VERSION 1 +#define VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME "VK_EXT_depth_range_unrestricted" + +#define VK_IMG_filter_cubic 1 +#define VK_IMG_FILTER_CUBIC_SPEC_VERSION 1 +#define VK_IMG_FILTER_CUBIC_EXTENSION_NAME "VK_IMG_filter_cubic" + +#define VK_AMD_rasterization_order 1 +#define VK_AMD_RASTERIZATION_ORDER_SPEC_VERSION 1 +#define VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME "VK_AMD_rasterization_order" + +typedef enum VkRasterizationOrderAMD { + VK_RASTERIZATION_ORDER_STRICT_AMD = 0, + VK_RASTERIZATION_ORDER_RELAXED_AMD = 1, + VK_RASTERIZATION_ORDER_BEGIN_RANGE_AMD = VK_RASTERIZATION_ORDER_STRICT_AMD, + VK_RASTERIZATION_ORDER_END_RANGE_AMD = VK_RASTERIZATION_ORDER_RELAXED_AMD, + VK_RASTERIZATION_ORDER_RANGE_SIZE_AMD = (VK_RASTERIZATION_ORDER_RELAXED_AMD - VK_RASTERIZATION_ORDER_STRICT_AMD + 1), // NOLINT: misc-redundant-expression + VK_RASTERIZATION_ORDER_MAX_ENUM_AMD = 0x7FFFFFFF +} VkRasterizationOrderAMD; + +typedef struct VkPipelineRasterizationStateRasterizationOrderAMD { + VkStructureType sType; + const void *pNext; + VkRasterizationOrderAMD rasterizationOrder; +} VkPipelineRasterizationStateRasterizationOrderAMD; + +#define VK_AMD_shader_trinary_minmax 1 +#define VK_AMD_SHADER_TRINARY_MINMAX_SPEC_VERSION 1 +#define VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME "VK_AMD_shader_trinary_minmax" + +#define VK_AMD_shader_explicit_vertex_parameter 1 +#define VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_SPEC_VERSION 1 +#define VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME "VK_AMD_shader_explicit_vertex_parameter" + +#define VK_EXT_debug_marker 1 +#define VK_EXT_DEBUG_MARKER_SPEC_VERSION 4 +#define VK_EXT_DEBUG_MARKER_EXTENSION_NAME "VK_EXT_debug_marker" + +typedef struct VkDebugMarkerObjectNameInfoEXT { + VkStructureType sType; + const void *pNext; + VkDebugReportObjectTypeEXT objectType; + uint64_t object; + const char *pObjectName; +} VkDebugMarkerObjectNameInfoEXT; + +typedef struct VkDebugMarkerObjectTagInfoEXT { + VkStructureType sType; + const void *pNext; + VkDebugReportObjectTypeEXT objectType; + uint64_t object; + uint64_t tagName; + size_t tagSize; + const void *pTag; +} VkDebugMarkerObjectTagInfoEXT; + +typedef struct VkDebugMarkerMarkerInfoEXT { + VkStructureType sType; + const void *pNext; + const char *pMarkerName; + float color[4]; +} VkDebugMarkerMarkerInfoEXT; + +typedef VkResult(VKAPI_PTR *PFN_vkDebugMarkerSetObjectTagEXT)(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo); +typedef VkResult(VKAPI_PTR *PFN_vkDebugMarkerSetObjectNameEXT)(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo); +typedef void(VKAPI_PTR *PFN_vkCmdDebugMarkerBeginEXT)(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo); +typedef void(VKAPI_PTR *PFN_vkCmdDebugMarkerEndEXT)(VkCommandBuffer commandBuffer); +typedef void(VKAPI_PTR *PFN_vkCmdDebugMarkerInsertEXT)(VkCommandBuffer commandBuffer, const VkDebugMarkerMarkerInfoEXT *pMarkerInfo); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectTagEXT( + VkDevice device, + const VkDebugMarkerObjectTagInfoEXT *pTagInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectNameEXT( + VkDevice device, + const VkDebugMarkerObjectNameInfoEXT *pNameInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerBeginEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT *pMarkerInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerEndEXT( + VkCommandBuffer commandBuffer); + +VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerInsertEXT( + VkCommandBuffer commandBuffer, + const VkDebugMarkerMarkerInfoEXT *pMarkerInfo); +#endif + +#define VK_AMD_gcn_shader 1 +#define VK_AMD_GCN_SHADER_SPEC_VERSION 1 +#define VK_AMD_GCN_SHADER_EXTENSION_NAME "VK_AMD_gcn_shader" + +#define VK_NV_dedicated_allocation 1 +#define VK_NV_DEDICATED_ALLOCATION_SPEC_VERSION 1 +#define VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_NV_dedicated_allocation" + +typedef struct VkDedicatedAllocationImageCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkBool32 dedicatedAllocation; +} VkDedicatedAllocationImageCreateInfoNV; + +typedef struct VkDedicatedAllocationBufferCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkBool32 dedicatedAllocation; +} VkDedicatedAllocationBufferCreateInfoNV; + +typedef struct VkDedicatedAllocationMemoryAllocateInfoNV { + VkStructureType sType; + const void *pNext; + VkImage image; + VkBuffer buffer; +} VkDedicatedAllocationMemoryAllocateInfoNV; + +#define VK_AMD_draw_indirect_count 1 +#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 1 +#define VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_AMD_draw_indirect_count" + +typedef void(VKAPI_PTR *PFN_vkCmdDrawIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); +typedef void(VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectCountAMD( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); + +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountAMD( + VkCommandBuffer commandBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, + uint32_t stride); +#endif + +#define VK_AMD_negative_viewport_height 1 +#define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_SPEC_VERSION 1 +#define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME "VK_AMD_negative_viewport_height" + +#define VK_AMD_gpu_shader_half_float 1 +#define VK_AMD_GPU_SHADER_HALF_FLOAT_SPEC_VERSION 1 +#define VK_AMD_GPU_SHADER_HALF_FLOAT_EXTENSION_NAME "VK_AMD_gpu_shader_half_float" + +#define VK_AMD_shader_ballot 1 +#define VK_AMD_SHADER_BALLOT_SPEC_VERSION 1 +#define VK_AMD_SHADER_BALLOT_EXTENSION_NAME "VK_AMD_shader_ballot" + +#define VK_AMD_texture_gather_bias_lod 1 +#define VK_AMD_TEXTURE_GATHER_BIAS_LOD_SPEC_VERSION 1 +#define VK_AMD_TEXTURE_GATHER_BIAS_LOD_EXTENSION_NAME "VK_AMD_texture_gather_bias_lod" + +typedef struct VkTextureLODGatherFormatPropertiesAMD { + VkStructureType sType; + void *pNext; + VkBool32 supportsTextureGatherLODBiasAMD; +} VkTextureLODGatherFormatPropertiesAMD; + +#define VK_KHX_multiview 1 +#define VK_KHX_MULTIVIEW_SPEC_VERSION 1 +#define VK_KHX_MULTIVIEW_EXTENSION_NAME "VK_KHX_multiview" + +typedef struct VkRenderPassMultiviewCreateInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t subpassCount; + const uint32_t *pViewMasks; + uint32_t dependencyCount; + const int32_t *pViewOffsets; + uint32_t correlationMaskCount; + const uint32_t *pCorrelationMasks; +} VkRenderPassMultiviewCreateInfoKHX; + +typedef struct VkPhysicalDeviceMultiviewFeaturesKHX { + VkStructureType sType; + void *pNext; + VkBool32 multiview; + VkBool32 multiviewGeometryShader; + VkBool32 multiviewTessellationShader; +} VkPhysicalDeviceMultiviewFeaturesKHX; + +typedef struct VkPhysicalDeviceMultiviewPropertiesKHX { + VkStructureType sType; + void *pNext; + uint32_t maxMultiviewViewCount; + uint32_t maxMultiviewInstanceIndex; +} VkPhysicalDeviceMultiviewPropertiesKHX; + +#define VK_IMG_format_pvrtc 1 +#define VK_IMG_FORMAT_PVRTC_SPEC_VERSION 1 +#define VK_IMG_FORMAT_PVRTC_EXTENSION_NAME "VK_IMG_format_pvrtc" + +#define VK_NV_external_memory_capabilities 1 +#define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_SPEC_VERSION 1 +#define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME "VK_NV_external_memory_capabilities" + +typedef enum VkExternalMemoryHandleTypeFlagBitsNV { + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_NV = 0x00000001, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_NV = 0x00000002, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_IMAGE_BIT_NV = 0x00000004, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_IMAGE_KMT_BIT_NV = 0x00000008, + VK_EXTERNAL_MEMORY_HANDLE_TYPE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkExternalMemoryHandleTypeFlagBitsNV; +typedef VkFlags VkExternalMemoryHandleTypeFlagsNV; + +typedef enum VkExternalMemoryFeatureFlagBitsNV { + VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_NV = 0x00000001, + VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_NV = 0x00000002, + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_NV = 0x00000004, + VK_EXTERNAL_MEMORY_FEATURE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF +} VkExternalMemoryFeatureFlagBitsNV; +typedef VkFlags VkExternalMemoryFeatureFlagsNV; + +typedef struct VkExternalImageFormatPropertiesNV { + VkImageFormatProperties imageFormatProperties; + VkExternalMemoryFeatureFlagsNV externalMemoryFeatures; + VkExternalMemoryHandleTypeFlagsNV exportFromImportedHandleTypes; + VkExternalMemoryHandleTypeFlagsNV compatibleHandleTypes; +} VkExternalImageFormatPropertiesNV; + +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkExternalMemoryHandleTypeFlagsNV externalHandleType, VkExternalImageFormatPropertiesNV *pExternalImageFormatProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceExternalImageFormatPropertiesNV( + VkPhysicalDevice physicalDevice, + VkFormat format, + VkImageType type, + VkImageTiling tiling, + VkImageUsageFlags usage, + VkImageCreateFlags flags, + VkExternalMemoryHandleTypeFlagsNV externalHandleType, + VkExternalImageFormatPropertiesNV *pExternalImageFormatProperties); +#endif + +#define VK_NV_external_memory 1 +#define VK_NV_EXTERNAL_MEMORY_SPEC_VERSION 1 +#define VK_NV_EXTERNAL_MEMORY_EXTENSION_NAME "VK_NV_external_memory" + +typedef struct VkExternalMemoryImageCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagsNV handleTypes; +} VkExternalMemoryImageCreateInfoNV; + +typedef struct VkExportMemoryAllocateInfoNV { + VkStructureType sType; + const void *pNext; + VkExternalMemoryHandleTypeFlagsNV handleTypes; +} VkExportMemoryAllocateInfoNV; + +#define VK_KHX_device_group 1 +#define VK_KHX_DEVICE_GROUP_SPEC_VERSION 1 +#define VK_KHX_DEVICE_GROUP_EXTENSION_NAME "VK_KHX_device_group" +#define VK_MAX_DEVICE_GROUP_SIZE_KHX 32 + +typedef enum VkPeerMemoryFeatureFlagBitsKHX { + VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT_KHX = 0x00000001, + VK_PEER_MEMORY_FEATURE_COPY_DST_BIT_KHX = 0x00000002, + VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT_KHX = 0x00000004, + VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT_KHX = 0x00000008, + VK_PEER_MEMORY_FEATURE_FLAG_BITS_MAX_ENUM_KHX = 0x7FFFFFFF +} VkPeerMemoryFeatureFlagBitsKHX; +typedef VkFlags VkPeerMemoryFeatureFlagsKHX; + +typedef enum VkMemoryAllocateFlagBitsKHX { + VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHX = 0x00000001, + VK_MEMORY_ALLOCATE_FLAG_BITS_MAX_ENUM_KHX = 0x7FFFFFFF +} VkMemoryAllocateFlagBitsKHX; +typedef VkFlags VkMemoryAllocateFlagsKHX; + +typedef enum VkDeviceGroupPresentModeFlagBitsKHX { + VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHX = 0x00000001, + VK_DEVICE_GROUP_PRESENT_MODE_REMOTE_BIT_KHX = 0x00000002, + VK_DEVICE_GROUP_PRESENT_MODE_SUM_BIT_KHX = 0x00000004, + VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_MULTI_DEVICE_BIT_KHX = 0x00000008, + VK_DEVICE_GROUP_PRESENT_MODE_FLAG_BITS_MAX_ENUM_KHX = 0x7FFFFFFF +} VkDeviceGroupPresentModeFlagBitsKHX; +typedef VkFlags VkDeviceGroupPresentModeFlagsKHX; + +typedef struct VkMemoryAllocateFlagsInfoKHX { + VkStructureType sType; + const void *pNext; + VkMemoryAllocateFlagsKHX flags; + uint32_t deviceMask; +} VkMemoryAllocateFlagsInfoKHX; + +typedef struct VkBindBufferMemoryInfoKHX { + VkStructureType sType; + const void *pNext; + VkBuffer buffer; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + uint32_t deviceIndexCount; + const uint32_t *pDeviceIndices; +} VkBindBufferMemoryInfoKHX; + +typedef struct VkBindImageMemoryInfoKHX { + VkStructureType sType; + const void *pNext; + VkImage image; + VkDeviceMemory memory; + VkDeviceSize memoryOffset; + uint32_t deviceIndexCount; + const uint32_t *pDeviceIndices; + uint32_t SFRRectCount; + const VkRect2D *pSFRRects; +} VkBindImageMemoryInfoKHX; + +typedef struct VkDeviceGroupRenderPassBeginInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t deviceMask; + uint32_t deviceRenderAreaCount; + const VkRect2D *pDeviceRenderAreas; +} VkDeviceGroupRenderPassBeginInfoKHX; + +typedef struct VkDeviceGroupCommandBufferBeginInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t deviceMask; +} VkDeviceGroupCommandBufferBeginInfoKHX; + +typedef struct VkDeviceGroupSubmitInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t waitSemaphoreCount; + const uint32_t *pWaitSemaphoreDeviceIndices; + uint32_t commandBufferCount; + const uint32_t *pCommandBufferDeviceMasks; + uint32_t signalSemaphoreCount; + const uint32_t *pSignalSemaphoreDeviceIndices; +} VkDeviceGroupSubmitInfoKHX; + +typedef struct VkDeviceGroupBindSparseInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t resourceDeviceIndex; + uint32_t memoryDeviceIndex; +} VkDeviceGroupBindSparseInfoKHX; + +typedef struct VkDeviceGroupPresentCapabilitiesKHX { + VkStructureType sType; + const void *pNext; + uint32_t presentMask[VK_MAX_DEVICE_GROUP_SIZE_KHX]; + VkDeviceGroupPresentModeFlagsKHX modes; +} VkDeviceGroupPresentCapabilitiesKHX; + +typedef struct VkImageSwapchainCreateInfoKHX { + VkStructureType sType; + const void *pNext; + VkSwapchainKHR swapchain; +} VkImageSwapchainCreateInfoKHX; + +typedef struct VkBindImageMemorySwapchainInfoKHX { + VkStructureType sType; + const void *pNext; + VkSwapchainKHR swapchain; + uint32_t imageIndex; +} VkBindImageMemorySwapchainInfoKHX; + +typedef struct VkAcquireNextImageInfoKHX { + VkStructureType sType; + const void *pNext; + VkSwapchainKHR swapchain; + uint64_t timeout; + VkSemaphore semaphore; + VkFence fence; + uint32_t deviceMask; +} VkAcquireNextImageInfoKHX; + +typedef struct VkDeviceGroupPresentInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t swapchainCount; + const uint32_t *pDeviceMasks; + VkDeviceGroupPresentModeFlagBitsKHX mode; +} VkDeviceGroupPresentInfoKHX; + +typedef struct VkDeviceGroupSwapchainCreateInfoKHX { + VkStructureType sType; + const void *pNext; + VkDeviceGroupPresentModeFlagsKHX modes; +} VkDeviceGroupSwapchainCreateInfoKHX; + +typedef void(VKAPI_PTR *PFN_vkGetDeviceGroupPeerMemoryFeaturesKHX)(VkDevice device, uint32_t heapIndex, uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlagsKHX *pPeerMemoryFeatures); +typedef VkResult(VKAPI_PTR *PFN_vkBindBufferMemory2KHX)(VkDevice device, uint32_t bindInfoCount, const VkBindBufferMemoryInfoKHX *pBindInfos); +typedef VkResult(VKAPI_PTR *PFN_vkBindImageMemory2KHX)(VkDevice device, uint32_t bindInfoCount, const VkBindImageMemoryInfoKHX *pBindInfos); +typedef void(VKAPI_PTR *PFN_vkCmdSetDeviceMaskKHX)(VkCommandBuffer commandBuffer, uint32_t deviceMask); +typedef void(VKAPI_PTR *PFN_vkCmdDispatchBaseKHX)(VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); +typedef VkResult(VKAPI_PTR *PFN_vkGetDeviceGroupPresentCapabilitiesKHX)(VkDevice device, VkDeviceGroupPresentCapabilitiesKHX *pDeviceGroupPresentCapabilities); +typedef VkResult(VKAPI_PTR *PFN_vkGetDeviceGroupSurfacePresentModesKHX)(VkDevice device, VkSurfaceKHR surface, VkDeviceGroupPresentModeFlagsKHX *pModes); +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDevicePresentRectanglesKHX)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, uint32_t *pRectCount, VkRect2D *pRects); +typedef VkResult(VKAPI_PTR *PFN_vkAcquireNextImage2KHX)(VkDevice device, const VkAcquireNextImageInfoKHX *pAcquireInfo, uint32_t *pImageIndex); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkGetDeviceGroupPeerMemoryFeaturesKHX( + VkDevice device, + uint32_t heapIndex, + uint32_t localDeviceIndex, + uint32_t remoteDeviceIndex, + VkPeerMemoryFeatureFlagsKHX *pPeerMemoryFeatures); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindBufferMemory2KHX( + VkDevice device, + uint32_t bindInfoCount, + const VkBindBufferMemoryInfoKHX *pBindInfos); + +VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory2KHX( + VkDevice device, + uint32_t bindInfoCount, + const VkBindImageMemoryInfoKHX *pBindInfos); + +VKAPI_ATTR void VKAPI_CALL vkCmdSetDeviceMaskKHX( + VkCommandBuffer commandBuffer, + uint32_t deviceMask); + +VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBaseKHX( + VkCommandBuffer commandBuffer, + uint32_t baseGroupX, + uint32_t baseGroupY, + uint32_t baseGroupZ, + uint32_t groupCountX, + uint32_t groupCountY, + uint32_t groupCountZ); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupPresentCapabilitiesKHX( + VkDevice device, + VkDeviceGroupPresentCapabilitiesKHX *pDeviceGroupPresentCapabilities); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceGroupSurfacePresentModesKHX( + VkDevice device, + VkSurfaceKHR surface, + VkDeviceGroupPresentModeFlagsKHX *pModes); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDevicePresentRectanglesKHX( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t *pRectCount, + VkRect2D *pRects); + +VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHX( + VkDevice device, + const VkAcquireNextImageInfoKHX *pAcquireInfo, + uint32_t *pImageIndex); +#endif + +#define VK_EXT_validation_flags 1 +#define VK_EXT_VALIDATION_FLAGS_SPEC_VERSION 1 +#define VK_EXT_VALIDATION_FLAGS_EXTENSION_NAME "VK_EXT_validation_flags" + +typedef enum VkValidationCheckEXT { + VK_VALIDATION_CHECK_ALL_EXT = 0, + VK_VALIDATION_CHECK_SHADERS_EXT = 1, + VK_VALIDATION_CHECK_BEGIN_RANGE_EXT = VK_VALIDATION_CHECK_ALL_EXT, + VK_VALIDATION_CHECK_END_RANGE_EXT = VK_VALIDATION_CHECK_SHADERS_EXT, + VK_VALIDATION_CHECK_RANGE_SIZE_EXT = (VK_VALIDATION_CHECK_SHADERS_EXT - VK_VALIDATION_CHECK_ALL_EXT + 1), // NOLINT: misc-redundant-expression + VK_VALIDATION_CHECK_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationCheckEXT; + +typedef struct VkValidationFlagsEXT { + VkStructureType sType; + const void *pNext; + uint32_t disabledValidationCheckCount; + VkValidationCheckEXT *pDisabledValidationChecks; +} VkValidationFlagsEXT; + +#ifdef VK_USE_PLATFORM_VI_NN +#define VK_NN_vi_surface 1 +#define VK_NN_VI_SURFACE_SPEC_VERSION 1 +#define VK_NN_VI_SURFACE_EXTENSION_NAME "VK_NN_vi_surface" + +typedef VkFlags VkViSurfaceCreateFlagsNN; + +typedef struct VkViSurfaceCreateInfoNN { + VkStructureType sType; + const void *pNext; + VkViSurfaceCreateFlagsNN flags; + void *window; +} VkViSurfaceCreateInfoNN; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateViSurfaceNN)(VkInstance instance, const VkViSurfaceCreateInfoNN *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateViSurfaceNN( + VkInstance instance, + const VkViSurfaceCreateInfoNN *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +#endif +#endif /* VK_USE_PLATFORM_VI_NN */ + +#define VK_EXT_shader_subgroup_ballot 1 +#define VK_EXT_SHADER_SUBGROUP_BALLOT_SPEC_VERSION 1 +#define VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME "VK_EXT_shader_subgroup_ballot" + +#define VK_EXT_shader_subgroup_vote 1 +#define VK_EXT_SHADER_SUBGROUP_VOTE_SPEC_VERSION 1 +#define VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME "VK_EXT_shader_subgroup_vote" + +#define VK_KHX_device_group_creation 1 +#define VK_KHX_DEVICE_GROUP_CREATION_SPEC_VERSION 1 +#define VK_KHX_DEVICE_GROUP_CREATION_EXTENSION_NAME "VK_KHX_device_group_creation" + +typedef struct VkPhysicalDeviceGroupPropertiesKHX { + VkStructureType sType; + void *pNext; + uint32_t physicalDeviceCount; + VkPhysicalDevice physicalDevices[VK_MAX_DEVICE_GROUP_SIZE_KHX]; + VkBool32 subsetAllocation; +} VkPhysicalDeviceGroupPropertiesKHX; + +typedef struct VkDeviceGroupDeviceCreateInfoKHX { + VkStructureType sType; + const void *pNext; + uint32_t physicalDeviceCount; + const VkPhysicalDevice *pPhysicalDevices; +} VkDeviceGroupDeviceCreateInfoKHX; + +typedef VkResult(VKAPI_PTR *PFN_vkEnumeratePhysicalDeviceGroupsKHX)(VkInstance instance, uint32_t *pPhysicalDeviceGroupCount, VkPhysicalDeviceGroupPropertiesKHX *pPhysicalDeviceGroupProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDeviceGroupsKHX( + VkInstance instance, + uint32_t *pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupPropertiesKHX *pPhysicalDeviceGroupProperties); +#endif + +#define VK_NVX_device_generated_commands 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX) +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX) + +#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 3 +#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NVX_device_generated_commands" + +typedef enum VkIndirectCommandsTokenTypeNVX { + VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX = 0, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DESCRIPTOR_SET_NVX = 1, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NVX = 2, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NVX = 3, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NVX = 4, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NVX = 5, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NVX = 6, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX = 7, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_BEGIN_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_END_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX, + VK_INDIRECT_COMMANDS_TOKEN_TYPE_RANGE_SIZE_NVX = (VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NVX - VK_INDIRECT_COMMANDS_TOKEN_TYPE_PIPELINE_NVX + 1), // NOLINT: misc-redundant-expression + VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF +} VkIndirectCommandsTokenTypeNVX; + +typedef enum VkObjectEntryTypeNVX { + VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX = 0, + VK_OBJECT_ENTRY_TYPE_PIPELINE_NVX = 1, + VK_OBJECT_ENTRY_TYPE_INDEX_BUFFER_NVX = 2, + VK_OBJECT_ENTRY_TYPE_VERTEX_BUFFER_NVX = 3, + VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX = 4, + VK_OBJECT_ENTRY_TYPE_BEGIN_RANGE_NVX = VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX, + VK_OBJECT_ENTRY_TYPE_END_RANGE_NVX = VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX, + VK_OBJECT_ENTRY_TYPE_RANGE_SIZE_NVX = (VK_OBJECT_ENTRY_TYPE_PUSH_CONSTANT_NVX - VK_OBJECT_ENTRY_TYPE_DESCRIPTOR_SET_NVX + 1), // NOLINT: misc-redundant-expression + VK_OBJECT_ENTRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF +} VkObjectEntryTypeNVX; + +typedef enum VkIndirectCommandsLayoutUsageFlagBitsNVX { + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX = 0x00000001, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_SPARSE_SEQUENCES_BIT_NVX = 0x00000002, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EMPTY_EXECUTIONS_BIT_NVX = 0x00000004, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX = 0x00000008, + VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF +} VkIndirectCommandsLayoutUsageFlagBitsNVX; +typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNVX; + +typedef enum VkObjectEntryUsageFlagBitsNVX { + VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX = 0x00000001, + VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX = 0x00000002, + VK_OBJECT_ENTRY_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF +} VkObjectEntryUsageFlagBitsNVX; +typedef VkFlags VkObjectEntryUsageFlagsNVX; + +typedef struct VkDeviceGeneratedCommandsFeaturesNVX { + VkStructureType sType; + const void *pNext; + VkBool32 computeBindingPointSupport; +} VkDeviceGeneratedCommandsFeaturesNVX; + +typedef struct VkDeviceGeneratedCommandsLimitsNVX { + VkStructureType sType; + const void *pNext; + uint32_t maxIndirectCommandsLayoutTokenCount; + uint32_t maxObjectEntryCounts; + uint32_t minSequenceCountBufferOffsetAlignment; + uint32_t minSequenceIndexBufferOffsetAlignment; + uint32_t minCommandsTokenBufferOffsetAlignment; +} VkDeviceGeneratedCommandsLimitsNVX; + +typedef struct VkIndirectCommandsTokenNVX { + VkIndirectCommandsTokenTypeNVX tokenType; + VkBuffer buffer; + VkDeviceSize offset; +} VkIndirectCommandsTokenNVX; + +typedef struct VkIndirectCommandsLayoutTokenNVX { + VkIndirectCommandsTokenTypeNVX tokenType; + uint32_t bindingUnit; + uint32_t dynamicCount; + uint32_t divisor; +} VkIndirectCommandsLayoutTokenNVX; + +typedef struct VkIndirectCommandsLayoutCreateInfoNVX { + VkStructureType sType; + const void *pNext; + VkPipelineBindPoint pipelineBindPoint; + VkIndirectCommandsLayoutUsageFlagsNVX flags; + uint32_t tokenCount; + const VkIndirectCommandsLayoutTokenNVX *pTokens; +} VkIndirectCommandsLayoutCreateInfoNVX; + +typedef struct VkCmdProcessCommandsInfoNVX { + VkStructureType sType; + const void *pNext; + VkObjectTableNVX objectTable; + VkIndirectCommandsLayoutNVX indirectCommandsLayout; + uint32_t indirectCommandsTokenCount; + const VkIndirectCommandsTokenNVX *pIndirectCommandsTokens; + uint32_t maxSequencesCount; + VkCommandBuffer targetCommandBuffer; + VkBuffer sequencesCountBuffer; + VkDeviceSize sequencesCountOffset; + VkBuffer sequencesIndexBuffer; + VkDeviceSize sequencesIndexOffset; +} VkCmdProcessCommandsInfoNVX; + +typedef struct VkCmdReserveSpaceForCommandsInfoNVX { + VkStructureType sType; + const void *pNext; + VkObjectTableNVX objectTable; + VkIndirectCommandsLayoutNVX indirectCommandsLayout; + uint32_t maxSequencesCount; +} VkCmdReserveSpaceForCommandsInfoNVX; + +typedef struct VkObjectTableCreateInfoNVX { + VkStructureType sType; + const void *pNext; + uint32_t objectCount; + const VkObjectEntryTypeNVX *pObjectEntryTypes; + const uint32_t *pObjectEntryCounts; + const VkObjectEntryUsageFlagsNVX *pObjectEntryUsageFlags; + uint32_t maxUniformBuffersPerDescriptor; + uint32_t maxStorageBuffersPerDescriptor; + uint32_t maxStorageImagesPerDescriptor; + uint32_t maxSampledImagesPerDescriptor; + uint32_t maxPipelineLayouts; +} VkObjectTableCreateInfoNVX; + +typedef struct VkObjectTableEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; +} VkObjectTableEntryNVX; + +typedef struct VkObjectTablePipelineEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipeline pipeline; +} VkObjectTablePipelineEntryNVX; + +typedef struct VkObjectTableDescriptorSetEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipelineLayout pipelineLayout; + VkDescriptorSet descriptorSet; +} VkObjectTableDescriptorSetEntryNVX; + +typedef struct VkObjectTableVertexBufferEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkBuffer buffer; +} VkObjectTableVertexBufferEntryNVX; + +typedef struct VkObjectTableIndexBufferEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkBuffer buffer; + VkIndexType indexType; +} VkObjectTableIndexBufferEntryNVX; + +typedef struct VkObjectTablePushConstantEntryNVX { + VkObjectEntryTypeNVX type; + VkObjectEntryUsageFlagsNVX flags; + VkPipelineLayout pipelineLayout; + VkShaderStageFlags stageFlags; +} VkObjectTablePushConstantEntryNVX; + +typedef void(VKAPI_PTR *PFN_vkCmdProcessCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdProcessCommandsInfoNVX *pProcessCommandsInfo); +typedef void(VKAPI_PTR *PFN_vkCmdReserveSpaceForCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdReserveSpaceForCommandsInfoNVX *pReserveSpaceInfo); +typedef VkResult(VKAPI_PTR *PFN_vkCreateIndirectCommandsLayoutNVX)(VkDevice device, const VkIndirectCommandsLayoutCreateInfoNVX *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkIndirectCommandsLayoutNVX *pIndirectCommandsLayout); +typedef void(VKAPI_PTR *PFN_vkDestroyIndirectCommandsLayoutNVX)(VkDevice device, VkIndirectCommandsLayoutNVX indirectCommandsLayout, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkCreateObjectTableNVX)(VkDevice device, const VkObjectTableCreateInfoNVX *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkObjectTableNVX *pObjectTable); +typedef void(VKAPI_PTR *PFN_vkDestroyObjectTableNVX)(VkDevice device, VkObjectTableNVX objectTable, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkRegisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectTableEntryNVX *const *ppObjectTableEntries, const uint32_t *pObjectIndices); +typedef VkResult(VKAPI_PTR *PFN_vkUnregisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectEntryTypeNVX *pObjectEntryTypes, const uint32_t *pObjectIndices); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)(VkPhysicalDevice physicalDevice, VkDeviceGeneratedCommandsFeaturesNVX *pFeatures, VkDeviceGeneratedCommandsLimitsNVX *pLimits); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdProcessCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdProcessCommandsInfoNVX *pProcessCommandsInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdReserveSpaceForCommandsNVX( + VkCommandBuffer commandBuffer, + const VkCmdReserveSpaceForCommandsInfoNVX *pReserveSpaceInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNVX( + VkDevice device, + const VkIndirectCommandsLayoutCreateInfoNVX *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkIndirectCommandsLayoutNVX *pIndirectCommandsLayout); + +VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNVX( + VkDevice device, + VkIndirectCommandsLayoutNVX indirectCommandsLayout, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkCreateObjectTableNVX( + VkDevice device, + const VkObjectTableCreateInfoNVX *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkObjectTableNVX *pObjectTable); + +VKAPI_ATTR void VKAPI_CALL vkDestroyObjectTableNVX( + VkDevice device, + VkObjectTableNVX objectTable, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkRegisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectTableEntryNVX *const *ppObjectTableEntries, + const uint32_t *pObjectIndices); + +VKAPI_ATTR VkResult VKAPI_CALL vkUnregisterObjectsNVX( + VkDevice device, + VkObjectTableNVX objectTable, + uint32_t objectCount, + const VkObjectEntryTypeNVX *pObjectEntryTypes, + const uint32_t *pObjectIndices); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX( + VkPhysicalDevice physicalDevice, + VkDeviceGeneratedCommandsFeaturesNVX *pFeatures, + VkDeviceGeneratedCommandsLimitsNVX *pLimits); +#endif + +#define VK_NV_clip_space_w_scaling 1 +#define VK_NV_CLIP_SPACE_W_SCALING_SPEC_VERSION 1 +#define VK_NV_CLIP_SPACE_W_SCALING_EXTENSION_NAME "VK_NV_clip_space_w_scaling" + +typedef struct VkViewportWScalingNV { + float xcoeff; + float ycoeff; +} VkViewportWScalingNV; + +typedef struct VkPipelineViewportWScalingStateCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkBool32 viewportWScalingEnable; + uint32_t viewportCount; + const VkViewportWScalingNV *pViewportWScalings; +} VkPipelineViewportWScalingStateCreateInfoNV; + +typedef void(VKAPI_PTR *PFN_vkCmdSetViewportWScalingNV)(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, const VkViewportWScalingNV *pViewportWScalings); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetViewportWScalingNV( + VkCommandBuffer commandBuffer, + uint32_t firstViewport, + uint32_t viewportCount, + const VkViewportWScalingNV *pViewportWScalings); +#endif + +#define VK_EXT_direct_mode_display 1 +#define VK_EXT_DIRECT_MODE_DISPLAY_SPEC_VERSION 1 +#define VK_EXT_DIRECT_MODE_DISPLAY_EXTENSION_NAME "VK_EXT_direct_mode_display" + +typedef VkResult(VKAPI_PTR *PFN_vkReleaseDisplayEXT)(VkPhysicalDevice physicalDevice, VkDisplayKHR display); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT( + VkPhysicalDevice physicalDevice, + VkDisplayKHR display); +#endif + +#define VK_EXT_display_surface_counter 1 +#define VK_EXT_DISPLAY_SURFACE_COUNTER_SPEC_VERSION 1 +#define VK_EXT_DISPLAY_SURFACE_COUNTER_EXTENSION_NAME "VK_EXT_display_surface_counter" +#define VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT + +typedef enum VkSurfaceCounterFlagBitsEXT { + VK_SURFACE_COUNTER_VBLANK_EXT = 0x00000001, + VK_SURFACE_COUNTER_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkSurfaceCounterFlagBitsEXT; +typedef VkFlags VkSurfaceCounterFlagsEXT; + +typedef struct VkSurfaceCapabilities2EXT { + VkStructureType sType; + void *pNext; + uint32_t minImageCount; + uint32_t maxImageCount; + VkExtent2D currentExtent; + VkExtent2D minImageExtent; + VkExtent2D maxImageExtent; + uint32_t maxImageArrayLayers; + VkSurfaceTransformFlagsKHR supportedTransforms; + VkSurfaceTransformFlagBitsKHR currentTransform; + VkCompositeAlphaFlagsKHR supportedCompositeAlpha; + VkImageUsageFlags supportedUsageFlags; + VkSurfaceCounterFlagsEXT supportedSurfaceCounters; +} VkSurfaceCapabilities2EXT; + +typedef VkResult(VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2EXT)(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface, VkSurfaceCapabilities2EXT *pSurfaceCapabilities); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2EXT( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilities2EXT *pSurfaceCapabilities); +#endif + +#define VK_EXT_display_control 1 +#define VK_EXT_DISPLAY_CONTROL_SPEC_VERSION 1 +#define VK_EXT_DISPLAY_CONTROL_EXTENSION_NAME "VK_EXT_display_control" + +typedef enum VkDisplayPowerStateEXT { + VK_DISPLAY_POWER_STATE_OFF_EXT = 0, + VK_DISPLAY_POWER_STATE_SUSPEND_EXT = 1, + VK_DISPLAY_POWER_STATE_ON_EXT = 2, + VK_DISPLAY_POWER_STATE_BEGIN_RANGE_EXT = VK_DISPLAY_POWER_STATE_OFF_EXT, + VK_DISPLAY_POWER_STATE_END_RANGE_EXT = VK_DISPLAY_POWER_STATE_ON_EXT, + VK_DISPLAY_POWER_STATE_RANGE_SIZE_EXT = (VK_DISPLAY_POWER_STATE_ON_EXT - VK_DISPLAY_POWER_STATE_OFF_EXT + 1), // NOLINT: misc-redundant-expression + VK_DISPLAY_POWER_STATE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDisplayPowerStateEXT; + +typedef enum VkDeviceEventTypeEXT { + VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT = 0, + VK_DEVICE_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT, + VK_DEVICE_EVENT_TYPE_END_RANGE_EXT = VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT, + VK_DEVICE_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT - VK_DEVICE_EVENT_TYPE_DISPLAY_HOTPLUG_EXT + 1), // NOLINT: misc-redundant-expression + VK_DEVICE_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDeviceEventTypeEXT; + +typedef enum VkDisplayEventTypeEXT { + VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT = 0, + VK_DISPLAY_EVENT_TYPE_BEGIN_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT, + VK_DISPLAY_EVENT_TYPE_END_RANGE_EXT = VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT, + VK_DISPLAY_EVENT_TYPE_RANGE_SIZE_EXT = (VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT - VK_DISPLAY_EVENT_TYPE_FIRST_PIXEL_OUT_EXT + 1), // NOLINT: misc-redundant-expression + VK_DISPLAY_EVENT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDisplayEventTypeEXT; + +typedef struct VkDisplayPowerInfoEXT { + VkStructureType sType; + const void *pNext; + VkDisplayPowerStateEXT powerState; +} VkDisplayPowerInfoEXT; + +typedef struct VkDeviceEventInfoEXT { + VkStructureType sType; + const void *pNext; + VkDeviceEventTypeEXT deviceEvent; +} VkDeviceEventInfoEXT; + +typedef struct VkDisplayEventInfoEXT { + VkStructureType sType; + const void *pNext; + VkDisplayEventTypeEXT displayEvent; +} VkDisplayEventInfoEXT; + +typedef struct VkSwapchainCounterCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkSurfaceCounterFlagsEXT surfaceCounters; +} VkSwapchainCounterCreateInfoEXT; + +typedef VkResult(VKAPI_PTR *PFN_vkDisplayPowerControlEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayPowerInfoEXT *pDisplayPowerInfo); +typedef VkResult(VKAPI_PTR *PFN_vkRegisterDeviceEventEXT)(VkDevice device, const VkDeviceEventInfoEXT *pDeviceEventInfo, const VkAllocationCallbacks *pAllocator, VkFence *pFence); +typedef VkResult(VKAPI_PTR *PFN_vkRegisterDisplayEventEXT)(VkDevice device, VkDisplayKHR display, const VkDisplayEventInfoEXT *pDisplayEventInfo, const VkAllocationCallbacks *pAllocator, VkFence *pFence); +typedef VkResult(VKAPI_PTR *PFN_vkGetSwapchainCounterEXT)(VkDevice device, VkSwapchainKHR swapchain, VkSurfaceCounterFlagBitsEXT counter, uint64_t *pCounterValue); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkDisplayPowerControlEXT( + VkDevice device, + VkDisplayKHR display, + const VkDisplayPowerInfoEXT *pDisplayPowerInfo); + +VKAPI_ATTR VkResult VKAPI_CALL vkRegisterDeviceEventEXT( + VkDevice device, + const VkDeviceEventInfoEXT *pDeviceEventInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence); + +VKAPI_ATTR VkResult VKAPI_CALL vkRegisterDisplayEventEXT( + VkDevice device, + VkDisplayKHR display, + const VkDisplayEventInfoEXT *pDisplayEventInfo, + const VkAllocationCallbacks *pAllocator, + VkFence *pFence); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainCounterEXT( + VkDevice device, + VkSwapchainKHR swapchain, + VkSurfaceCounterFlagBitsEXT counter, + uint64_t *pCounterValue); +#endif + +#define VK_GOOGLE_display_timing 1 +#define VK_GOOGLE_DISPLAY_TIMING_SPEC_VERSION 1 +#define VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME "VK_GOOGLE_display_timing" + +typedef struct VkRefreshCycleDurationGOOGLE { + uint64_t refreshDuration; +} VkRefreshCycleDurationGOOGLE; + +typedef struct VkPastPresentationTimingGOOGLE { + uint32_t presentID; + uint64_t desiredPresentTime; + uint64_t actualPresentTime; + uint64_t earliestPresentTime; + uint64_t presentMargin; +} VkPastPresentationTimingGOOGLE; + +typedef struct VkPresentTimeGOOGLE { + uint32_t presentID; + uint64_t desiredPresentTime; +} VkPresentTimeGOOGLE; + +typedef struct VkPresentTimesInfoGOOGLE { + VkStructureType sType; + const void *pNext; + uint32_t swapchainCount; + const VkPresentTimeGOOGLE *pTimes; +} VkPresentTimesInfoGOOGLE; + +typedef VkResult(VKAPI_PTR *PFN_vkGetRefreshCycleDurationGOOGLE)(VkDevice device, VkSwapchainKHR swapchain, VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties); +typedef VkResult(VKAPI_PTR *PFN_vkGetPastPresentationTimingGOOGLE)(VkDevice device, VkSwapchainKHR swapchain, uint32_t *pPresentationTimingCount, VkPastPresentationTimingGOOGLE *pPresentationTimings); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetRefreshCycleDurationGOOGLE( + VkDevice device, + VkSwapchainKHR swapchain, + VkRefreshCycleDurationGOOGLE *pDisplayTimingProperties); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPastPresentationTimingGOOGLE( + VkDevice device, + VkSwapchainKHR swapchain, + uint32_t *pPresentationTimingCount, + VkPastPresentationTimingGOOGLE *pPresentationTimings); +#endif + +#define VK_NV_sample_mask_override_coverage 1 +#define VK_NV_SAMPLE_MASK_OVERRIDE_COVERAGE_SPEC_VERSION 1 +#define VK_NV_SAMPLE_MASK_OVERRIDE_COVERAGE_EXTENSION_NAME "VK_NV_sample_mask_override_coverage" + +#define VK_NV_geometry_shader_passthrough 1 +#define VK_NV_GEOMETRY_SHADER_PASSTHROUGH_SPEC_VERSION 1 +#define VK_NV_GEOMETRY_SHADER_PASSTHROUGH_EXTENSION_NAME "VK_NV_geometry_shader_passthrough" + +#define VK_NV_viewport_array2 1 +#define VK_NV_VIEWPORT_ARRAY2_SPEC_VERSION 1 +#define VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME "VK_NV_viewport_array2" + +#define VK_NVX_multiview_per_view_attributes 1 +#define VK_NVX_MULTIVIEW_PER_VIEW_ATTRIBUTES_SPEC_VERSION 1 +#define VK_NVX_MULTIVIEW_PER_VIEW_ATTRIBUTES_EXTENSION_NAME "VK_NVX_multiview_per_view_attributes" + +typedef struct VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX { + VkStructureType sType; + void *pNext; + VkBool32 perViewPositionAllComponents; +} VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX; + +#define VK_NV_viewport_swizzle 1 +#define VK_NV_VIEWPORT_SWIZZLE_SPEC_VERSION 1 +#define VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME "VK_NV_viewport_swizzle" + +typedef enum VkViewportCoordinateSwizzleNV { + VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV = 0, + VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_X_NV = 1, + VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Y_NV = 2, + VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Y_NV = 3, + VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Z_NV = 4, + VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Z_NV = 5, + VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV = 6, + VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV = 7, + VK_VIEWPORT_COORDINATE_SWIZZLE_BEGIN_RANGE_NV = VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV, + VK_VIEWPORT_COORDINATE_SWIZZLE_END_RANGE_NV = VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV, + VK_VIEWPORT_COORDINATE_SWIZZLE_RANGE_SIZE_NV = (VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV - VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV + 1), // NOLINT: misc-redundant-expression + VK_VIEWPORT_COORDINATE_SWIZZLE_MAX_ENUM_NV = 0x7FFFFFFF +} VkViewportCoordinateSwizzleNV; + +typedef VkFlags VkPipelineViewportSwizzleStateCreateFlagsNV; + +typedef struct VkViewportSwizzleNV { + VkViewportCoordinateSwizzleNV x; + VkViewportCoordinateSwizzleNV y; + VkViewportCoordinateSwizzleNV z; + VkViewportCoordinateSwizzleNV w; +} VkViewportSwizzleNV; + +typedef struct VkPipelineViewportSwizzleStateCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkPipelineViewportSwizzleStateCreateFlagsNV flags; + uint32_t viewportCount; + const VkViewportSwizzleNV *pViewportSwizzles; +} VkPipelineViewportSwizzleStateCreateInfoNV; + +#define VK_EXT_discard_rectangles 1 +#define VK_EXT_DISCARD_RECTANGLES_SPEC_VERSION 1 +#define VK_EXT_DISCARD_RECTANGLES_EXTENSION_NAME "VK_EXT_discard_rectangles" + +typedef enum VkDiscardRectangleModeEXT { + VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT = 0, + VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT = 1, + VK_DISCARD_RECTANGLE_MODE_BEGIN_RANGE_EXT = VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT, + VK_DISCARD_RECTANGLE_MODE_END_RANGE_EXT = VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT, + VK_DISCARD_RECTANGLE_MODE_RANGE_SIZE_EXT = (VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT - VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT + 1), // NOLINT: misc-redundant-expression + VK_DISCARD_RECTANGLE_MODE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkDiscardRectangleModeEXT; + +typedef VkFlags VkPipelineDiscardRectangleStateCreateFlagsEXT; + +typedef struct VkPhysicalDeviceDiscardRectanglePropertiesEXT { + VkStructureType sType; + void *pNext; + uint32_t maxDiscardRectangles; +} VkPhysicalDeviceDiscardRectanglePropertiesEXT; + +typedef struct VkPipelineDiscardRectangleStateCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkPipelineDiscardRectangleStateCreateFlagsEXT flags; + VkDiscardRectangleModeEXT discardRectangleMode; + uint32_t discardRectangleCount; + const VkRect2D *pDiscardRectangles; +} VkPipelineDiscardRectangleStateCreateInfoEXT; + +typedef void(VKAPI_PTR *PFN_vkCmdSetDiscardRectangleEXT)(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle, uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetDiscardRectangleEXT( + VkCommandBuffer commandBuffer, + uint32_t firstDiscardRectangle, + uint32_t discardRectangleCount, + const VkRect2D *pDiscardRectangles); +#endif + +#define VK_EXT_swapchain_colorspace 1 +#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 3 +#define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace" + +#define VK_EXT_hdr_metadata 1 +#define VK_EXT_HDR_METADATA_SPEC_VERSION 1 +#define VK_EXT_HDR_METADATA_EXTENSION_NAME "VK_EXT_hdr_metadata" + +typedef struct VkXYColorEXT { + float x; + float y; +} VkXYColorEXT; + +typedef struct VkHdrMetadataEXT { + VkStructureType sType; + const void *pNext; + VkXYColorEXT displayPrimaryRed; + VkXYColorEXT displayPrimaryGreen; + VkXYColorEXT displayPrimaryBlue; + VkXYColorEXT whitePoint; + float maxLuminance; + float minLuminance; + float maxContentLightLevel; + float maxFrameAverageLightLevel; +} VkHdrMetadataEXT; + +typedef void(VKAPI_PTR *PFN_vkSetHdrMetadataEXT)(VkDevice device, uint32_t swapchainCount, const VkSwapchainKHR *pSwapchains, const VkHdrMetadataEXT *pMetadata); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkSetHdrMetadataEXT( + VkDevice device, + uint32_t swapchainCount, + const VkSwapchainKHR *pSwapchains, + const VkHdrMetadataEXT *pMetadata); +#endif + +#ifdef VK_USE_PLATFORM_IOS_MVK +#define VK_MVK_ios_surface 1 +#define VK_MVK_IOS_SURFACE_SPEC_VERSION 2 +#define VK_MVK_IOS_SURFACE_EXTENSION_NAME "VK_MVK_ios_surface" + +typedef VkFlags VkIOSSurfaceCreateFlagsMVK; + +typedef struct VkIOSSurfaceCreateInfoMVK { + VkStructureType sType; + const void *pNext; + VkIOSSurfaceCreateFlagsMVK flags; + const void *pView; +} VkIOSSurfaceCreateInfoMVK; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateIOSSurfaceMVK)(VkInstance instance, const VkIOSSurfaceCreateInfoMVK *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateIOSSurfaceMVK( + VkInstance instance, + const VkIOSSurfaceCreateInfoMVK *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +#endif +#endif /* VK_USE_PLATFORM_IOS_MVK */ + +#ifdef VK_USE_PLATFORM_MACOS_MVK +#define VK_MVK_macos_surface 1 +#define VK_MVK_MACOS_SURFACE_SPEC_VERSION 2 +#define VK_MVK_MACOS_SURFACE_EXTENSION_NAME "VK_MVK_macos_surface" + +typedef VkFlags VkMacOSSurfaceCreateFlagsMVK; + +typedef struct VkMacOSSurfaceCreateInfoMVK { + VkStructureType sType; + const void *pNext; + VkMacOSSurfaceCreateFlagsMVK flags; + const void *pView; +} VkMacOSSurfaceCreateInfoMVK; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateMacOSSurfaceMVK)(VkInstance instance, const VkMacOSSurfaceCreateInfoMVK *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateMacOSSurfaceMVK( + VkInstance instance, + const VkMacOSSurfaceCreateInfoMVK *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkSurfaceKHR *pSurface); +#endif +#endif /* VK_USE_PLATFORM_MACOS_MVK */ + +#define VK_EXT_sampler_filter_minmax 1 +#define VK_EXT_SAMPLER_FILTER_MINMAX_SPEC_VERSION 1 +#define VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME "VK_EXT_sampler_filter_minmax" + +typedef enum VkSamplerReductionModeEXT { + VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT = 0, + VK_SAMPLER_REDUCTION_MODE_MIN_EXT = 1, + VK_SAMPLER_REDUCTION_MODE_MAX_EXT = 2, + VK_SAMPLER_REDUCTION_MODE_BEGIN_RANGE_EXT = VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT, + VK_SAMPLER_REDUCTION_MODE_END_RANGE_EXT = VK_SAMPLER_REDUCTION_MODE_MAX_EXT, + VK_SAMPLER_REDUCTION_MODE_RANGE_SIZE_EXT = (VK_SAMPLER_REDUCTION_MODE_MAX_EXT - VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT + 1), // NOLINT: misc-redundant-expression + VK_SAMPLER_REDUCTION_MODE_MAX_ENUM_EXT = 0x7FFFFFFF +} VkSamplerReductionModeEXT; + +typedef struct VkSamplerReductionModeCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkSamplerReductionModeEXT reductionMode; +} VkSamplerReductionModeCreateInfoEXT; + +typedef struct VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT { + VkStructureType sType; + void *pNext; + VkBool32 filterMinmaxSingleComponentFormats; + VkBool32 filterMinmaxImageComponentMapping; +} VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT; + +#define VK_AMD_gpu_shader_int16 1 +#define VK_AMD_GPU_SHADER_INT16_SPEC_VERSION 1 +#define VK_AMD_GPU_SHADER_INT16_EXTENSION_NAME "VK_AMD_gpu_shader_int16" + +#define VK_AMD_mixed_attachment_samples 1 +#define VK_AMD_MIXED_ATTACHMENT_SAMPLES_SPEC_VERSION 1 +#define VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME "VK_AMD_mixed_attachment_samples" + +#define VK_AMD_shader_fragment_mask 1 +#define VK_AMD_SHADER_FRAGMENT_MASK_SPEC_VERSION 1 +#define VK_AMD_SHADER_FRAGMENT_MASK_EXTENSION_NAME "VK_AMD_shader_fragment_mask" + +#define VK_EXT_shader_stencil_export 1 +#define VK_EXT_SHADER_STENCIL_EXPORT_SPEC_VERSION 1 +#define VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME "VK_EXT_shader_stencil_export" + +#define VK_EXT_sample_locations 1 +#define VK_EXT_SAMPLE_LOCATIONS_SPEC_VERSION 1 +#define VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME "VK_EXT_sample_locations" + +typedef struct VkSampleLocationEXT { + float x; + float y; +} VkSampleLocationEXT; + +typedef struct VkSampleLocationsInfoEXT { + VkStructureType sType; + const void *pNext; + VkSampleCountFlagBits sampleLocationsPerPixel; + VkExtent2D sampleLocationGridSize; + uint32_t sampleLocationsCount; + const VkSampleLocationEXT *pSampleLocations; +} VkSampleLocationsInfoEXT; + +typedef struct VkAttachmentSampleLocationsEXT { + uint32_t attachmentIndex; + VkSampleLocationsInfoEXT sampleLocationsInfo; +} VkAttachmentSampleLocationsEXT; + +typedef struct VkSubpassSampleLocationsEXT { + uint32_t subpassIndex; + VkSampleLocationsInfoEXT sampleLocationsInfo; +} VkSubpassSampleLocationsEXT; + +typedef struct VkRenderPassSampleLocationsBeginInfoEXT { + VkStructureType sType; + const void *pNext; + uint32_t attachmentInitialSampleLocationsCount; + const VkAttachmentSampleLocationsEXT *pAttachmentInitialSampleLocations; + uint32_t postSubpassSampleLocationsCount; + const VkSubpassSampleLocationsEXT *pSubpassSampleLocations; +} VkRenderPassSampleLocationsBeginInfoEXT; + +typedef struct VkPipelineSampleLocationsStateCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkBool32 sampleLocationsEnable; + VkSampleLocationsInfoEXT sampleLocationsInfo; +} VkPipelineSampleLocationsStateCreateInfoEXT; + +typedef struct VkPhysicalDeviceSampleLocationsPropertiesEXT { + VkStructureType sType; + void *pNext; + VkSampleCountFlags sampleLocationSampleCounts; + VkExtent2D maxSampleLocationGridSize; + float sampleLocationCoordinateRange[2]; + uint32_t sampleLocationSubPixelBits; + VkBool32 variableSampleLocations; +} VkPhysicalDeviceSampleLocationsPropertiesEXT; + +typedef struct VkMultisamplePropertiesEXT { + VkStructureType sType; + void *pNext; + VkExtent2D maxSampleLocationGridSize; +} VkMultisamplePropertiesEXT; + +typedef void(VKAPI_PTR *PFN_vkCmdSetSampleLocationsEXT)(VkCommandBuffer commandBuffer, const VkSampleLocationsInfoEXT *pSampleLocationsInfo); +typedef void(VKAPI_PTR *PFN_vkGetPhysicalDeviceMultisamplePropertiesEXT)(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples, VkMultisamplePropertiesEXT *pMultisampleProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdSetSampleLocationsEXT( + VkCommandBuffer commandBuffer, + const VkSampleLocationsInfoEXT *pSampleLocationsInfo); + +VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceMultisamplePropertiesEXT( + VkPhysicalDevice physicalDevice, + VkSampleCountFlagBits samples, + VkMultisamplePropertiesEXT *pMultisampleProperties); +#endif + +#define VK_EXT_blend_operation_advanced 1 +#define VK_EXT_BLEND_OPERATION_ADVANCED_SPEC_VERSION 2 +#define VK_EXT_BLEND_OPERATION_ADVANCED_EXTENSION_NAME "VK_EXT_blend_operation_advanced" + +typedef enum VkBlendOverlapEXT { + VK_BLEND_OVERLAP_UNCORRELATED_EXT = 0, + VK_BLEND_OVERLAP_DISJOINT_EXT = 1, + VK_BLEND_OVERLAP_CONJOINT_EXT = 2, + VK_BLEND_OVERLAP_BEGIN_RANGE_EXT = VK_BLEND_OVERLAP_UNCORRELATED_EXT, + VK_BLEND_OVERLAP_END_RANGE_EXT = VK_BLEND_OVERLAP_CONJOINT_EXT, + VK_BLEND_OVERLAP_RANGE_SIZE_EXT = (VK_BLEND_OVERLAP_CONJOINT_EXT - VK_BLEND_OVERLAP_UNCORRELATED_EXT + 1), // NOLINT: misc-redundant-expression + VK_BLEND_OVERLAP_MAX_ENUM_EXT = 0x7FFFFFFF +} VkBlendOverlapEXT; + +typedef struct VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT { + VkStructureType sType; + void *pNext; + VkBool32 advancedBlendCoherentOperations; +} VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT; + +typedef struct VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT { + VkStructureType sType; + void *pNext; + uint32_t advancedBlendMaxColorAttachments; + VkBool32 advancedBlendIndependentBlend; + VkBool32 advancedBlendNonPremultipliedSrcColor; + VkBool32 advancedBlendNonPremultipliedDstColor; + VkBool32 advancedBlendCorrelatedOverlap; + VkBool32 advancedBlendAllOperations; +} VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT; + +typedef struct VkPipelineColorBlendAdvancedStateCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkBool32 srcPremultiplied; + VkBool32 dstPremultiplied; + VkBlendOverlapEXT blendOverlap; +} VkPipelineColorBlendAdvancedStateCreateInfoEXT; + +#define VK_NV_fragment_coverage_to_color 1 +#define VK_NV_FRAGMENT_COVERAGE_TO_COLOR_SPEC_VERSION 1 +#define VK_NV_FRAGMENT_COVERAGE_TO_COLOR_EXTENSION_NAME "VK_NV_fragment_coverage_to_color" + +typedef VkFlags VkPipelineCoverageToColorStateCreateFlagsNV; + +typedef struct VkPipelineCoverageToColorStateCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkPipelineCoverageToColorStateCreateFlagsNV flags; + VkBool32 coverageToColorEnable; + uint32_t coverageToColorLocation; +} VkPipelineCoverageToColorStateCreateInfoNV; + +#define VK_NV_framebuffer_mixed_samples 1 +#define VK_NV_FRAMEBUFFER_MIXED_SAMPLES_SPEC_VERSION 1 +#define VK_NV_FRAMEBUFFER_MIXED_SAMPLES_EXTENSION_NAME "VK_NV_framebuffer_mixed_samples" + +typedef enum VkCoverageModulationModeNV { + VK_COVERAGE_MODULATION_MODE_NONE_NV = 0, + VK_COVERAGE_MODULATION_MODE_RGB_NV = 1, + VK_COVERAGE_MODULATION_MODE_ALPHA_NV = 2, + VK_COVERAGE_MODULATION_MODE_RGBA_NV = 3, + VK_COVERAGE_MODULATION_MODE_BEGIN_RANGE_NV = VK_COVERAGE_MODULATION_MODE_NONE_NV, + VK_COVERAGE_MODULATION_MODE_END_RANGE_NV = VK_COVERAGE_MODULATION_MODE_RGBA_NV, + VK_COVERAGE_MODULATION_MODE_RANGE_SIZE_NV = (VK_COVERAGE_MODULATION_MODE_RGBA_NV - VK_COVERAGE_MODULATION_MODE_NONE_NV + 1), // NOLINT: misc-redundant-expression + VK_COVERAGE_MODULATION_MODE_MAX_ENUM_NV = 0x7FFFFFFF +} VkCoverageModulationModeNV; + +typedef VkFlags VkPipelineCoverageModulationStateCreateFlagsNV; + +typedef struct VkPipelineCoverageModulationStateCreateInfoNV { + VkStructureType sType; + const void *pNext; + VkPipelineCoverageModulationStateCreateFlagsNV flags; + VkCoverageModulationModeNV coverageModulationMode; + VkBool32 coverageModulationTableEnable; + uint32_t coverageModulationTableCount; + const float *pCoverageModulationTable; +} VkPipelineCoverageModulationStateCreateInfoNV; + +#define VK_NV_fill_rectangle 1 +#define VK_NV_FILL_RECTANGLE_SPEC_VERSION 1 +#define VK_NV_FILL_RECTANGLE_EXTENSION_NAME "VK_NV_fill_rectangle" + +#define VK_EXT_post_depth_coverage 1 +#define VK_EXT_POST_DEPTH_COVERAGE_SPEC_VERSION 1 +#define VK_EXT_POST_DEPTH_COVERAGE_EXTENSION_NAME "VK_EXT_post_depth_coverage" + +#define VK_EXT_validation_cache 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT) + +#define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1 +#define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache" + +typedef enum VkValidationCacheHeaderVersionEXT { + VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT = 1, + VK_VALIDATION_CACHE_HEADER_VERSION_BEGIN_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT, + VK_VALIDATION_CACHE_HEADER_VERSION_END_RANGE_EXT = VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT, + VK_VALIDATION_CACHE_HEADER_VERSION_RANGE_SIZE_EXT = (VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT - VK_VALIDATION_CACHE_HEADER_VERSION_ONE_EXT + 1), // NOLINT: misc-redundant-expression + VK_VALIDATION_CACHE_HEADER_VERSION_MAX_ENUM_EXT = 0x7FFFFFFF +} VkValidationCacheHeaderVersionEXT; + +typedef VkFlags VkValidationCacheCreateFlagsEXT; + +typedef struct VkValidationCacheCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkValidationCacheCreateFlagsEXT flags; + size_t initialDataSize; + const void *pInitialData; +} VkValidationCacheCreateInfoEXT; + +typedef struct VkShaderModuleValidationCacheCreateInfoEXT { + VkStructureType sType; + const void *pNext; + VkValidationCacheEXT validationCache; +} VkShaderModuleValidationCacheCreateInfoEXT; + +typedef VkResult(VKAPI_PTR *PFN_vkCreateValidationCacheEXT)(VkDevice device, const VkValidationCacheCreateInfoEXT *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkValidationCacheEXT *pValidationCache); +typedef void(VKAPI_PTR *PFN_vkDestroyValidationCacheEXT)(VkDevice device, VkValidationCacheEXT validationCache, const VkAllocationCallbacks *pAllocator); +typedef VkResult(VKAPI_PTR *PFN_vkMergeValidationCachesEXT)(VkDevice device, VkValidationCacheEXT dstCache, uint32_t srcCacheCount, const VkValidationCacheEXT *pSrcCaches); +typedef VkResult(VKAPI_PTR *PFN_vkGetValidationCacheDataEXT)(VkDevice device, VkValidationCacheEXT validationCache, size_t *pDataSize, void *pData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreateValidationCacheEXT( + VkDevice device, + const VkValidationCacheCreateInfoEXT *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkValidationCacheEXT *pValidationCache); + +VKAPI_ATTR void VKAPI_CALL vkDestroyValidationCacheEXT( + VkDevice device, + VkValidationCacheEXT validationCache, + const VkAllocationCallbacks *pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkMergeValidationCachesEXT( + VkDevice device, + VkValidationCacheEXT dstCache, + uint32_t srcCacheCount, + const VkValidationCacheEXT *pSrcCaches); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetValidationCacheDataEXT( + VkDevice device, + VkValidationCacheEXT validationCache, + size_t *pDataSize, + void *pData); +#endif + +#define VK_EXT_shader_viewport_index_layer 1 +#define VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_SPEC_VERSION 1 +#define VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME "VK_EXT_shader_viewport_index_layer" + +// Provided by VK_VERSION_1_1 +typedef struct VkPhysicalDeviceShaderFloat16Int8Features { + uint32_t sType; + void *pNext; + VkBool32 shaderFloat16; + VkBool32 shaderInt8; +} VkPhysicalDeviceShaderFloat16Int8FeaturesKHR; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR 1000082000 + +// Provided by VK_VERSION_1_2 +typedef struct VkPhysicalDevice8BitStorageFeatures { + uint32_t sType; + void *pNext; + VkBool32 storageBuffer8BitAccess; + VkBool32 uniformAndStorageBuffer8BitAccess; + VkBool32 storagePushConstant8; +} VkPhysicalDevice8BitStorageFeaturesKHR; +#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR 1000177000 + +// Provided by VK_KHR_portability_enumeration +#define VK_KHR_portability_enumeration 1 +#define VK_KHR_PORTABILITY_ENUMERATION_SPEC_VERSION 1 +#define VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME "VK_KHR_portability_enumeration" + +} // extern "C" + +#endif // HALIDE_MINI_VULKAN_H diff --git a/src/runtime/runtime_api.cpp b/src/runtime/runtime_api.cpp index c4a5e39a52f5..51f5b7245343 100644 --- a/src/runtime/runtime_api.cpp +++ b/src/runtime/runtime_api.cpp @@ -9,6 +9,7 @@ #include "HalideRuntimeOpenCL.h" #include "HalideRuntimeOpenGLCompute.h" #include "HalideRuntimeQurt.h" +#include "HalideRuntimeVulkan.h" #include "HalideRuntimeWebGPU.h" #include "cpu_features.h" @@ -213,6 +214,11 @@ extern "C" __attribute__((used)) void *halide_runtime_api_functions[] = { (void *)&halide_d3d12compute_finalize_kernels, (void *)&halide_d3d12compute_release_context, (void *)&halide_d3d12compute_run, + (void *)&halide_vulkan_acquire_context, + (void *)&halide_vulkan_device_interface, + (void *)&halide_vulkan_initialize_kernels, + (void *)&halide_vulkan_release_context, + (void *)&halide_vulkan_run, (void *)&halide_webgpu_device_interface, (void *)&halide_webgpu_initialize_kernels, (void *)&halide_webgpu_finalize_kernels, diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp new file mode 100644 index 000000000000..a4765e4f56fa --- /dev/null +++ b/src/runtime/vulkan.cpp @@ -0,0 +1,1474 @@ +#include "HalideRuntimeVulkan.h" + +#include "device_buffer_utils.h" +#include "device_interface.h" +#include "runtime_internal.h" +#include "vulkan_context.h" +#include "vulkan_extensions.h" +#include "vulkan_internal.h" +#include "vulkan_memory.h" +#include "vulkan_resources.h" + +using namespace Halide::Runtime::Internal::Vulkan; + +// -------------------------------------------------------------------------- + +extern "C" { + +// -------------------------------------------------------------------------- + +// The default implementation of halide_acquire_vulkan_context uses +// the global pointers above, and serializes access with a spin lock. +// Overriding implementations of acquire/release must implement the +// following behavior: + +// - halide_acquire_vulkan_context should always store a valid +// instance/device/queue in the corresponding out parameters, +// or return an error code. +// - A call to halide_acquire_vulkan_context is followed by a matching +// call to halide_release_vulkan_context. halide_acquire_vulkan_context +// should block while a previous call (if any) has not yet been +// released via halide_release_vulkan_context. +WEAK int halide_vulkan_acquire_context(void *user_context, + halide_vulkan_memory_allocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkCommandPool *command_pool, + VkQueue *queue, + uint32_t *queue_family_index, + bool create) { +#ifdef DEBUG_RUNTIME + halide_start_clock(user_context); +#endif + halide_debug_assert(user_context, instance != nullptr); + halide_debug_assert(user_context, device != nullptr); + halide_debug_assert(user_context, queue != nullptr); + halide_debug_assert(user_context, &thread_lock != nullptr); + while (__atomic_test_and_set(&thread_lock, __ATOMIC_ACQUIRE)) {} + + // If the context has not been initialized, initialize it now. + if ((cached_instance == nullptr) && create) { + int error_code = vk_create_context(user_context, + reinterpret_cast(&cached_allocator), + &cached_instance, + &cached_device, + &cached_physical_device, + &cached_command_pool, + &cached_queue, + &cached_queue_family_index); + if (error_code != halide_error_code_success) { + debug(user_context) << "halide_vulkan_acquire_context: FAILED to create context!\n"; + __atomic_clear(&thread_lock, __ATOMIC_RELEASE); + return error_code; + } + } + + *allocator = cached_allocator; + *instance = cached_instance; + *device = cached_device; + *physical_device = cached_physical_device; + *command_pool = cached_command_pool; + *queue = cached_queue; + *queue_family_index = cached_queue_family_index; + return halide_error_code_success; +} + +WEAK int halide_vulkan_release_context(void *user_context, VkInstance instance, VkDevice device, VkQueue queue) { + __atomic_clear(&thread_lock, __ATOMIC_RELEASE); + return halide_error_code_success; +} + +WEAK int halide_vulkan_device_free(void *user_context, halide_buffer_t *halide_buffer) { + debug(user_context) + << "halide_vulkan_device_free (user_context: " << user_context + << ", halide_buffer: " << halide_buffer << ")\n"; + + // halide_vulkan_device_free, at present, can be exposed to clients and they + // should be allowed to call halide_vulkan_device_free on any halide_buffer_t + // including ones that have never been used with a GPU. + if (halide_buffer->device == 0) { + return halide_error_code_success; + } + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + // get the allocated region for the device + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); + MemoryRegion *memory_region = ctx.allocator->owner_of(user_context, device_region); + if (ctx.allocator && memory_region && memory_region->handle) { + if (halide_can_reuse_device_allocations(user_context)) { + ctx.allocator->release(user_context, memory_region); + } else { + ctx.allocator->reclaim(user_context, memory_region); + } + } + halide_buffer->device = 0; + halide_buffer->device_interface->impl->release_module(); + halide_buffer->device_interface = nullptr; + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_compute_capability(void *user_context, int *major, int *minor) { + debug(user_context) << " halide_vulkan_compute_capability (user_context: " << user_context << ")\n"; + return vk_find_compute_capability(user_context, major, minor); +} + +WEAK int halide_vulkan_initialize_kernels(void *user_context, void **state_ptr, const char *src, int size) { + debug(user_context) + << "halide_vulkan_init_kernels (user_context: " << user_context + << ", state_ptr: " << state_ptr + << ", program: " << (void *)src + << ", size: " << size << "\n"; + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + debug(user_context) << "halide_vulkan_initialize_kernels got compilation_cache mutex.\n"; + VulkanCompilationCacheEntry *cache_entry = nullptr; + if (!compilation_cache.kernel_state_setup(user_context, state_ptr, ctx.device, cache_entry, + Halide::Runtime::Internal::Vulkan::vk_compile_shader_module, + user_context, ctx.allocator, src, size)) { + error(user_context) << "Vulkan: Failed to setup compilation cache!\n"; + return halide_error_code_generic_error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK void halide_vulkan_finalize_kernels(void *user_context, void *state_ptr) { + debug(user_context) + << "halide_vulkan_finalize_kernels (user_context: " << user_context + << ", state_ptr: " << state_ptr << "\n"; + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + VulkanContext ctx(user_context); + if (ctx.error == halide_error_code_success) { + compilation_cache.release_hold(user_context, ctx.device, state_ptr); + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif +} + +// Used to generate correct timings when tracing +WEAK int halide_vulkan_device_sync(void *user_context, halide_buffer_t *) { + debug(user_context) << "halide_vulkan_device_sync (user_context: " << user_context << ")\n"; + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + vkQueueWaitIdle(ctx.queue); + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_device_release(void *user_context) { + debug(user_context) + << "halide_vulkan_device_release (user_context: " << user_context << ")\n"; + + VulkanMemoryAllocator *allocator; + VkInstance instance; + VkDevice device; + VkCommandPool command_pool; + VkPhysicalDevice physical_device; + VkQueue queue; + uint32_t _throwaway; + + int acquire_status = halide_vulkan_acquire_context(user_context, + reinterpret_cast(&allocator), + &instance, &device, &physical_device, &command_pool, &queue, &_throwaway, false); + + if ((acquire_status == halide_error_code_success) && (instance != nullptr)) { + vkQueueWaitIdle(queue); + if (command_pool == cached_command_pool) { + cached_command_pool = 0; + } + if (reinterpret_cast(allocator) == cached_allocator) { + cached_allocator = nullptr; + } + + vk_destroy_command_pool(user_context, allocator, command_pool); + vk_destroy_shader_modules(user_context, allocator); + vk_destroy_memory_allocator(user_context, allocator); + + if (device == cached_device) { + cached_device = nullptr; + cached_physical_device = nullptr; + cached_queue = nullptr; + cached_queue_family_index = 0; + } + vkDestroyDevice(device, nullptr); + + if (instance == cached_instance) { + cached_instance = nullptr; + } + vkDestroyInstance(instance, nullptr); + halide_vulkan_release_context(user_context, instance, device, queue); + } + + return halide_error_code_success; +} + +WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { + debug(user_context) + << "halide_vulkan_device_malloc (user_context: " << user_context + << ", buf: " << buf << ")\n"; + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + + size_t size = buf->size_in_bytes(); + if (buf->device) { + MemoryRegion *device_region = (MemoryRegion *)(buf->device); + if (device_region->size >= size) { + debug(user_context) << "Vulkan: Requested allocation for existing device memory ... using existing buffer!\n"; + return halide_error_code_success; + } else { + debug(user_context) << "Vulkan: Requested allocation of different size ... reallocating buffer!\n"; + if (halide_can_reuse_device_allocations(user_context)) { + ctx.allocator->release(user_context, device_region); + } else { + ctx.allocator->reclaim(user_context, device_region); + } + buf->device = 0; + } + } + + for (int i = 0; i < buf->dimensions; i++) { + halide_debug_assert(user_context, buf->dim[i].stride >= 0); + } + +#ifdef DEBUG_RUNTIME + debug(user_context) << " allocating buffer: "; + if (buf && buf->dim) { + debug(user_context) << "extents: " << buf->dim[0].extent << "x" + << buf->dim[1].extent << "x" << buf->dim[2].extent << "x" + << buf->dim[3].extent << " " + << "strides: " << buf->dim[0].stride << "x" + << buf->dim[1].stride << "x" << buf->dim[2].stride << "x" + << buf->dim[3].stride << " "; + } + debug(user_context) << "type: " << buf->type << " " + << "size_in_bytes: " << (uint64_t)size << " " + << "(or " << (size * 1e-6f) << "MB)\n"; + + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + // request uncached device only memory + MemoryRequest request = {0}; + request.size = size; + request.properties.usage = MemoryUsage::TransferSrcDst; + request.properties.caching = MemoryCaching::Uncached; + request.properties.visibility = MemoryVisibility::DeviceOnly; + + // allocate a new region + MemoryRegion *device_region = ctx.allocator->reserve(user_context, request); + if ((device_region == nullptr) || (device_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return halide_error_code_device_malloc_failed; + } + + buf->device = (uint64_t)device_region; + buf->device_interface = &vulkan_device_interface; + buf->device_interface->impl->use_module(); + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " allocated device region=" << (void *)device_region << "\n" + << " containing device buffer=" << (void *)device_region->handle << "\n" + << " for halide buffer " << buf << "\n"; +#endif + + // retrieve the buffer from the region + VkBuffer *device_buffer = reinterpret_cast(device_region->handle); + if (device_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve device buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + int error_code = vk_clear_device_buffer(user_context, ctx.allocator, ctx.command_pool, ctx.queue, *device_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy command buffer!\n"; + return error_code; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_copy_to_device(void *user_context, halide_buffer_t *halide_buffer) { + int error_code = halide_vulkan_device_malloc(user_context, halide_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return error_code; + } + + debug(user_context) + << "halide_vulkan_copy_to_device (user_context: " << user_context + << ", halide_buffer: " << halide_buffer << ")\n"; + + // Acquire the context so we can use the command queue. + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + if ((halide_buffer->host == nullptr) || (halide_buffer->device == 0)) { + error(user_context) << "Vulkan: Missing host/device pointers for halide buffer!\n"; + return halide_error_code_internal_error; + } + device_copy copy_helper = make_host_to_device_copy(halide_buffer); + + // We construct a staging buffer to copy into from host memory. Then, + // we use vkCmdCopyBuffer() to copy from the staging buffer into the + // the actual device memory. + MemoryRequest request = {0}; + request.size = halide_buffer->size_in_bytes(); + request.properties.usage = MemoryUsage::TransferSrc; + request.properties.caching = MemoryCaching::UncachedCoherent; + request.properties.visibility = MemoryVisibility::HostToDevice; + + // allocate a new region + MemoryRegion *staging_region = ctx.allocator->reserve(user_context, request); + if ((staging_region == nullptr) || (staging_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return halide_error_code_device_malloc_failed; + } + + // map the region to a host ptr + uint8_t *stage_host_ptr = (uint8_t *)ctx.allocator->map(user_context, staging_region); + if (stage_host_ptr == nullptr) { + error(user_context) << "Vulkan: Failed to map host pointer to device memory!\n"; + return halide_error_code_internal_error; + } + + // copy to the (host-visible/coherent) staging buffer + copy_helper.dst = (uint64_t)(stage_host_ptr); + copy_memory(copy_helper, user_context); + + // retrieve the buffer from the region + VkBuffer *staging_buffer = reinterpret_cast(staging_region->handle); + if (staging_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve staging buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + // unmap the pointer + error_code = ctx.allocator->unmap(user_context, staging_region); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to unmap host pointer to device memory!\n"; + return error_code; + } + + // get the allocated region for the device + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); + if (device_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve device region for buffer!\n"; + return halide_error_code_internal_error; + } + + MemoryRegion *memory_region = ctx.allocator->owner_of(user_context, device_region); + if (memory_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve memory region for device!\n"; + return halide_error_code_internal_error; + } + + // retrieve the buffer from the region + VkBuffer *device_buffer = reinterpret_cast(memory_region->handle); + if (device_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n"; + return halide_error_code_internal_error; + } + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " copying into device region=" << (void *)device_region << "\n" + << " containing device buffer=" << (void *)device_buffer << "\n" + << " from halide buffer=" << halide_buffer << "\n"; +#endif + + // create a command buffer + VkCommandBuffer command_buffer; + error_code = vk_create_command_buffer(user_context, ctx.allocator, ctx.command_pool, &command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command buffer!\n"; + return error_code; + } + + // begin the command buffer + VkCommandBufferBeginInfo command_buffer_begin_info = + { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, // flags + nullptr // pointer to parent command buffer + }; + + VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_buffer_copy_failed; + } + + // define the src and dst config + bool from_host = true; + bool to_host = false; + copy_helper.src = (uint64_t)(staging_buffer); + copy_helper.dst = (uint64_t)(device_buffer); + uint64_t src_offset = copy_helper.src_begin; + uint64_t dst_offset = device_region->range.head_offset; + + // enqueue the copy operation, using the allocated buffers + error_code = vk_do_multidimensional_copy(user_context, command_buffer, copy_helper, + src_offset, dst_offset, + halide_buffer->dimensions, + from_host, to_host); + + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: vk_do_multidimensional_copy failed!\n"; + return error_code; + } + + // end the command buffer + result = vkEndCommandBuffer(command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_buffer_copy_failed; + } + + //// 13. Submit the command buffer to our command queue + VkSubmitInfo submit_info = + { + VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type + nullptr, // pointer to struct extending this + 0, // wait semaphore count + nullptr, // semaphores + nullptr, // pipeline stages where semaphore waits occur + 1, // how many command buffers to execute + &command_buffer, // the command buffers + 0, // number of semaphores to signal + nullptr // the semaphores to signal + }; + + result = vkQueueSubmit(ctx.queue, 1, &submit_info, 0); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueSubmit returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_buffer_copy_failed; + } + + //// 14. Wait until the queue is done with the command buffer + result = vkQueueWaitIdle(ctx.queue); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueWaitIdle returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_buffer_copy_failed; + } + + //// 15. Reclaim the staging buffer + if (halide_can_reuse_device_allocations(user_context)) { + ctx.allocator->release(user_context, staging_region); + } else { + ctx.allocator->reclaim(user_context, staging_region); + } + + error_code = vk_destroy_command_buffer(user_context, ctx.allocator, ctx.command_pool, command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy command buffer!\n"; + return error_code; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_copy_to_host(void *user_context, halide_buffer_t *halide_buffer) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << "halide_copy_to_host (user_context: " << user_context + << ", halide_buffer: " << halide_buffer << ")\n"; +#endif + if (halide_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to copy buffer to host ... invalid halide buffer!\n"; + return halide_error_code_copy_to_host_failed; + } + + // Acquire the context so we can use the command queue. This also avoids multiple + // redundant calls to enqueue a download when multiple threads are trying to copy + // the same buffer. + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + if ((halide_buffer->host == nullptr) || (halide_buffer->device == 0)) { + error(user_context) << "Vulkan: Missing host/device pointers for halide buffer!\n"; + return halide_error_code_internal_error; + } + + device_copy copy_helper = make_device_to_host_copy(halide_buffer); + + // This is the inverse of copy_to_device: we create a staging buffer, copy into + // it, map it so the host can see it, then copy into the host buffer + MemoryRequest request = {0}; + request.size = halide_buffer->size_in_bytes(); + request.properties.usage = MemoryUsage::TransferDst; + request.properties.caching = MemoryCaching::UncachedCoherent; + request.properties.visibility = MemoryVisibility::DeviceToHost; + + // allocate a new region for staging the transfer + MemoryRegion *staging_region = ctx.allocator->reserve(user_context, request); + if ((staging_region == nullptr) || (staging_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return halide_error_code_device_malloc_failed; + } + + // retrieve the buffer from the region + VkBuffer *staging_buffer = reinterpret_cast(staging_region->handle); + if (staging_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve staging buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + // get the allocated region for the device + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); + if (device_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve device region for buffer!\n"; + return halide_error_code_internal_error; + } + + MemoryRegion *memory_region = ctx.allocator->owner_of(user_context, device_region); + if (memory_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve memory region for buffer!\n"; + return halide_error_code_internal_error; + } + + // retrieve the buffer from the region + VkBuffer *device_buffer = reinterpret_cast(memory_region->handle); + if (device_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n"; + return halide_error_code_internal_error; + } + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " copying from device region=" << (void *)device_region << "\n" + << " containing device buffer=" << (void *)device_buffer << "\n" + << " into halide buffer=" << halide_buffer << "\n"; +#endif + + // create a command buffer + VkCommandBuffer command_buffer; + int error_code = vk_create_command_buffer(user_context, ctx.allocator, ctx.command_pool, &command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command buffer!\n"; + return error_code; + } + + // begin the command buffer + VkCommandBufferBeginInfo command_buffer_begin_info = + { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, // flags + nullptr // pointer to parent command buffer + }; + + VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_buffer_copy_failed; + } + + // define the src and dst config + bool from_host = false; + bool to_host = true; + uint64_t copy_dst = copy_helper.dst; + copy_helper.src = (uint64_t)(device_buffer); + copy_helper.dst = (uint64_t)(staging_buffer); + uint64_t src_offset = copy_helper.src_begin + device_region->range.head_offset; + uint64_t dst_offset = 0; + + // enqueue the copy operation, using the allocated buffers + error_code = vk_do_multidimensional_copy(user_context, command_buffer, copy_helper, + src_offset, dst_offset, + halide_buffer->dimensions, + from_host, to_host); + + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: vk_do_multidimensional_copy failed!\n"; + return error_code; + } + + // end the command buffer + result = vkEndCommandBuffer(command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return result; + } + + //// 13. Submit the command buffer to our command queue + VkSubmitInfo submit_info = + { + VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type + nullptr, // pointer to struct extending this + 0, // wait semaphore count + nullptr, // semaphores + nullptr, // pipeline stages where semaphore waits occur + 1, // how many command buffers to execute + &command_buffer, // the command buffers + 0, // number of semaphores to signal + nullptr // the semaphores to signal + }; + + result = vkQueueSubmit(ctx.queue, 1, &submit_info, 0); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueSubmit returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_copy_to_device_failed; + } + + //// 14. Wait until the queue is done with the command buffer + result = vkQueueWaitIdle(ctx.queue); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueWaitIdle returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_copy_to_device_failed; + } + + // map the staging region to a host ptr + uint8_t *stage_host_ptr = (uint8_t *)ctx.allocator->map(user_context, staging_region); + if (stage_host_ptr == nullptr) { + error(user_context) << "Vulkan: Failed to map host pointer to device memory!\n"; + return halide_error_code_copy_to_device_failed; + } + + // copy to the (host-visible/coherent) staging buffer + copy_helper.dst = copy_dst; + copy_helper.src = (uint64_t)(stage_host_ptr); + copy_memory(copy_helper, user_context); + + // unmap the pointer and reclaim the staging region + error_code = ctx.allocator->unmap(user_context, staging_region); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to umap staging region!\n"; + return error_code; + } + + if (halide_can_reuse_device_allocations(user_context)) { + ctx.allocator->release(user_context, staging_region); + } else { + ctx.allocator->reclaim(user_context, staging_region); + } + vk_destroy_command_buffer(user_context, ctx.allocator, ctx.command_pool, command_buffer); + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_buffer_copy(void *user_context, struct halide_buffer_t *src, + const struct halide_device_interface_t *dst_device_interface, + struct halide_buffer_t *dst) { + if (dst->dimensions > MAX_COPY_DIMS) { + error(user_context) << "Vulkan: Buffer has too many dimensions to copy to/from GPU\n"; + return halide_error_code_buffer_extents_too_large; + } + + // We only handle copies to Vulkan buffers or to host + if ((dst_device_interface != nullptr) && (dst_device_interface != &vulkan_device_interface)) { + error(user_context) << "Vulkan: Unable to copy buffer ... only Vulkan allocated device buffers copying to/from host are supported!\n"; + return halide_error_code_device_buffer_copy_failed; + } + + if ((src->device_dirty() || src->host == nullptr) && (src->device_interface != &vulkan_device_interface)) { + // This is handled at the higher level. + return halide_error_code_incompatible_device_interface; + } + + bool from_host = (src->device_interface != &vulkan_device_interface) || + (src->device == 0) || + (src->host_dirty() && src->host != nullptr); + bool to_host = !dst_device_interface; + + if (!(from_host || src->device)) { + error(user_context) << "Vulkan: halide_vulkan_buffer_copy: invalid copy source\n"; + return halide_error_code_device_buffer_copy_failed; + } + if (!(to_host || dst->device)) { + error(user_context) << "Vulkan: halide_vulkan_buffer_copy: invalid copy destination\n"; + return halide_error_code_device_buffer_copy_failed; + } + + device_copy copy_helper = make_buffer_copy(src, from_host, dst, to_host); + + int error_code = halide_error_code_success; + { + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + + debug(user_context) + << "halide_vulkan_buffer_copy (user_context: " << user_context + << ", src: " << src << ", dst: " << dst << ")\n"; + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + MemoryRegion *staging_region = nullptr; + MemoryRegion *src_buffer_region = nullptr; + MemoryRegion *dst_buffer_region = nullptr; + + //// wait until the queue is done with the command buffer + VkResult wait_result = vkQueueWaitIdle(ctx.queue); + if (wait_result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueWaitIdle returned " << vk_get_error_name(wait_result) << "\n"; + if (to_host) { + return halide_error_code_copy_to_host_failed; + } else { + return halide_error_code_copy_to_device_failed; + } + } + + int error_code = halide_error_code_success; + if (!from_host && !to_host) { + // Device only case + debug(user_context) << " buffer copy from: device to: device\n"; + + // get the buffer regions for the device + src_buffer_region = reinterpret_cast(src->device); + dst_buffer_region = reinterpret_cast(dst->device); + + } else if (!from_host && to_host) { + // Device to Host + debug(user_context) << " buffer copy from: device to: host\n"; + + // Need to make sure all reads and writes to/from source are complete. + MemoryRequest request = {0}; + request.size = src->size_in_bytes(); + request.properties.usage = MemoryUsage::TransferSrc; + request.properties.caching = MemoryCaching::UncachedCoherent; + request.properties.visibility = MemoryVisibility::DeviceToHost; + + // allocate a new region + staging_region = ctx.allocator->reserve(user_context, request); + if ((staging_region == nullptr) || (staging_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return halide_error_code_device_malloc_failed; + } + + // use the staging region and buffer from the copy destination + src_buffer_region = reinterpret_cast(src->device); + dst_buffer_region = staging_region; + + } else if (from_host && !to_host) { + // Host to Device + debug(user_context) << " buffer copy from: host to: device\n"; + + // Need to make sure all reads and writes to/from destination are complete. + MemoryRequest request = {0}; + request.size = src->size_in_bytes(); + request.properties.usage = MemoryUsage::TransferSrc; + request.properties.caching = MemoryCaching::UncachedCoherent; + request.properties.visibility = MemoryVisibility::HostToDevice; + + // allocate a new region + staging_region = ctx.allocator->reserve(user_context, request); + if ((staging_region == nullptr) || (staging_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to allocate device memory!\n"; + return halide_error_code_device_malloc_failed; + } + + // map the region to a host ptr + uint8_t *stage_host_ptr = (uint8_t *)ctx.allocator->map(user_context, staging_region); + if (stage_host_ptr == nullptr) { + error(user_context) << "Vulkan: Failed to map host pointer to device memory!\n"; + return halide_error_code_copy_to_device_failed; + } + + // copy to the (host-visible/coherent) staging buffer, then restore the dst pointer + uint64_t copy_dst_ptr = copy_helper.dst; + copy_helper.dst = (uint64_t)(stage_host_ptr); + copy_memory(copy_helper, user_context); + copy_helper.dst = copy_dst_ptr; + + // unmap the pointer + error_code = ctx.allocator->unmap(user_context, staging_region); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to unmap staging region!\n"; + return halide_error_code_copy_to_device_failed; + } + + // use the staging region and buffer from the copy source + src_buffer_region = staging_region; + dst_buffer_region = reinterpret_cast(dst->device); + + } else if (from_host && to_host) { + debug(user_context) << " buffer copy from: host to: host\n"; + copy_memory(copy_helper, user_context); + return halide_error_code_success; + } + + if (src_buffer_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve source buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + if (dst_buffer_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve destination buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + // get the owning memory region (that holds the allocation) + MemoryRegion *src_memory_region = ctx.allocator->owner_of(user_context, src_buffer_region); + MemoryRegion *dst_memory_region = ctx.allocator->owner_of(user_context, dst_buffer_region); + + // retrieve the buffers from the owning allocation region + VkBuffer *src_device_buffer = reinterpret_cast(src_memory_region->handle); + VkBuffer *dst_device_buffer = reinterpret_cast(dst_memory_region->handle); + + // create a command buffer + VkCommandBuffer command_buffer; + error_code = vk_create_command_buffer(user_context, ctx.allocator, ctx.command_pool, &command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command buffer!\n"; + if (to_host) { + return halide_error_code_copy_to_host_failed; + } else { + return halide_error_code_copy_to_device_failed; + } + } + + // begin the command buffer + VkCommandBufferBeginInfo command_buffer_begin_info = + { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, // flags + nullptr // pointer to parent command buffer + }; + + VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; + if (to_host) { + return halide_error_code_copy_to_host_failed; + } else { + return halide_error_code_copy_to_device_failed; + } + } + + // define the src and dst config + uint64_t copy_dst = copy_helper.dst; + copy_helper.src = (uint64_t)(src_device_buffer); + copy_helper.dst = (uint64_t)(dst_device_buffer); + uint64_t src_offset = copy_helper.src_begin + src_buffer_region->range.head_offset; + uint64_t dst_offset = dst_buffer_region->range.head_offset; + if (!from_host && !to_host) { + src_offset = src_buffer_region->range.head_offset; + dst_offset = dst_buffer_region->range.head_offset; + } + + debug(user_context) << " src region=" << (void *)src_memory_region << " buffer=" << (void *)src_device_buffer << " crop_offset=" << (uint64_t)src_buffer_region->range.head_offset << " copy_offset=" << src_offset << "\n"; + debug(user_context) << " dst region=" << (void *)dst_memory_region << " buffer=" << (void *)dst_device_buffer << " crop_offset=" << (uint64_t)dst_buffer_region->range.head_offset << " copy_offset=" << dst_offset << "\n"; + + // enqueue the copy operation, using the allocated buffers + error_code = vk_do_multidimensional_copy(user_context, command_buffer, copy_helper, + src_offset, dst_offset, + src->dimensions, + from_host, to_host); + + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: vk_do_multidimensional_copy failed!\n"; + return error_code; + } + + // end the command buffer + result = vkEndCommandBuffer(command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; + if (to_host) { + return halide_error_code_copy_to_host_failed; + } else { + return halide_error_code_copy_to_device_failed; + } + } + + //// submit the command buffer to our command queue + VkSubmitInfo submit_info = + { + VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type + nullptr, // pointer to struct extending this + 0, // wait semaphore count + nullptr, // semaphores + nullptr, // pipeline stages where semaphore waits occur + 1, // how many command buffers to execute + &command_buffer, // the command buffers + 0, // number of semaphores to signal + nullptr // the semaphores to signal + }; + + result = vkQueueSubmit(ctx.queue, 1, &submit_info, 0); + if (result != VK_SUCCESS) { + error(user_context) << "vkQueueSubmit returned " << vk_get_error_name(result) << "\n"; + return result; + } + + //// wait until the queue is done with the command buffer + result = vkQueueWaitIdle(ctx.queue); + if (result != VK_SUCCESS) { + error(user_context) << "vkQueueWaitIdle returned " << vk_get_error_name(result) << "\n"; + return result; + } + + if (!from_host && to_host) { + // map the staging region to a host ptr + uint8_t *stage_host_ptr = (uint8_t *)ctx.allocator->map(user_context, staging_region); + if (stage_host_ptr == nullptr) { + error(user_context) << "Vulkan: Failed to map host pointer to device memory!\n"; + return halide_error_code_internal_error; + } + + // copy to the (host-visible/coherent) staging buffer + copy_helper.dst = copy_dst; + copy_helper.src = (uint64_t)(stage_host_ptr); + copy_memory(copy_helper, user_context); + + // unmap the pointer and reclaim the staging region + error_code = ctx.allocator->unmap(user_context, staging_region); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to unmap pointer for staging region!\n"; + return error_code; + } + } + + if (staging_region) { + if (halide_can_reuse_device_allocations(user_context)) { + error_code = ctx.allocator->release(user_context, staging_region); + } else { + error_code = ctx.allocator->reclaim(user_context, staging_region); + } + } + + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to release staging region allocation!\n"; + return error_code; + } + + error_code = vk_destroy_command_buffer(user_context, ctx.allocator, ctx.command_pool, command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy command buffer!\n"; + return error_code; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + } + + return error_code; +} + +WEAK int halide_vulkan_device_crop(void *user_context, + const struct halide_buffer_t *src, + struct halide_buffer_t *dst) { + const int64_t offset = calc_device_crop_byte_offset(src, dst); + return vk_device_crop_from_offset(user_context, src, offset, dst); +} + +WEAK int halide_vulkan_device_slice(void *user_context, + const struct halide_buffer_t *src, + int slice_dim, int slice_pos, + struct halide_buffer_t *dst) { + const int64_t offset = calc_device_slice_byte_offset(src, slice_dim, slice_pos); + return vk_device_crop_from_offset(user_context, src, offset, dst); +} + +WEAK int halide_vulkan_device_release_crop(void *user_context, + struct halide_buffer_t *halide_buffer) { + + debug(user_context) + << "Vulkan: halide_vulkan_device_release_crop (user_context: " << user_context + << ", halide_buffer: " << halide_buffer << ")\n"; + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + // get the allocated region for the device + MemoryRegion *device_region = reinterpret_cast(halide_buffer->device); + if (device_region == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve device region for buffer!\n"; + return halide_error_code_internal_error; + } + + int error_code = ctx.allocator->destroy_crop(user_context, device_region); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy crop for device region!\n"; + return error_code; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +WEAK int halide_vulkan_run(void *user_context, + void *state_ptr, + const char *entry_name, + int blocksX, int blocksY, int blocksZ, + int threadsX, int threadsY, int threadsZ, + int shared_mem_bytes, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << "halide_vulkan_run (user_context: " << user_context << ", " + << "entry: " << entry_name << ", " + << "blocks: " << blocksX << "x" << blocksY << "x" << blocksZ << ", " + << "threads: " << threadsX << "x" << threadsY << "x" << threadsZ << ", " + << "shmem: " << shared_mem_bytes << "\n"; +#endif + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + // Running a Vulkan pipeline requires a large number of steps + // and boilerplate. We save pipeline specific objects alongside the + // shader module in the compilation cache to avoid re-creating these + // if used more than once. + // + // 1. Lookup the shader module cache entry in the compilation cache + // --- If shader module doesn't exist yet, then lookup invokes compile + // 1a. Locate the correct entry point for the kernel (code modules may contain multiple entry points) + // 2. If the rest of the cache entry is uninitialized, then create new objects: + // 2a. Create a descriptor set layout + // 2b. Create a pipeline layout + // 2c. Create a compute pipeline + // --- Apply specializations to pipeline for shared memory or workgroup sizes + // 2d. Create a descriptor set + // --- The above can be cached between invocations --- + // 3. Set bindings for buffers and args in the descriptor set + // 3a. Create the buffer for the scalar params + // 3b. Copy args into uniform buffer + // 3c. Update buffer bindings for descriptor set + // 4. Create a command buffer from the command pool + // 5. Fill the command buffer with a dispatch call + // 7a. Bind the compute pipeline + // 7b. Bind the descriptor set + // 7c. Add a dispatch to the command buffer + // 7d. End the command buffer + // 6. Submit the command buffer to our command queue + // --- The following isn't the most efficient, but it's what we do in Metal etc. --- + // 7. Wait until the queue is done with the command buffer + // 8. Cleanup all temporary objects + + // 1. Get the shader module cache entry + VulkanCompilationCacheEntry *cache_entry = nullptr; + bool found = compilation_cache.lookup(ctx.device, state_ptr, cache_entry); + if (!found || (cache_entry == nullptr)) { + error(user_context) << "Vulkan: Failed to locate shader module! Unable to proceed!\n"; + return halide_error_code_internal_error; + } + + // 1a. Locate the correct entry point from the cache + bool found_entry_point = false; + uint32_t entry_point_index = 0; + for (uint32_t n = 0; (n < cache_entry->shader_count) && !found_entry_point; ++n) { + if (strcmp(cache_entry->shader_bindings[n].entry_point_name, entry_name) == 0) { + entry_point_index = n; + found_entry_point = true; + } + } + if (!found_entry_point || (entry_point_index >= cache_entry->shader_count)) { + error(user_context) << "Vulkan: Failed to locate shader entry point! Unable to proceed!\n"; + return halide_error_code_internal_error; + } + debug(user_context) << " found entry point [" + << (entry_point_index + 1) << " of " << cache_entry->shader_count + << "] '" << entry_name << "'\n"; + + // 2. Create objects for execution + if (cache_entry->descriptor_set_layouts == nullptr) { + error(user_context) << "Vulkan: Missing descriptor set layouts! Unable to proceed!\n"; + return halide_error_code_internal_error; + } + + int error_code = halide_error_code_success; + if (cache_entry->pipeline_layout == 0) { + + // 2a. Create all descriptor set layouts + for (uint32_t n = 0; n < cache_entry->shader_count; ++n) { + if (((void *)cache_entry->descriptor_set_layouts[n]) == nullptr) { + uint32_t uniform_buffer_count = cache_entry->shader_bindings[n].uniform_buffer_count; + uint32_t storage_buffer_count = cache_entry->shader_bindings[n].storage_buffer_count; + debug(user_context) << " creating descriptor set layout [" << n << "] " << cache_entry->shader_bindings[n].entry_point_name << "\n"; + error_code = vk_create_descriptor_set_layout(user_context, ctx.allocator, uniform_buffer_count, storage_buffer_count, &(cache_entry->descriptor_set_layouts[n])); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create descriptor set layout!\n"; + return error_code; + } + } + } + + // 2b. Create the pipeline layout + error_code = vk_create_pipeline_layout(user_context, ctx.allocator, cache_entry->shader_count, cache_entry->descriptor_set_layouts, &(cache_entry->pipeline_layout)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create pipeline layout!\n"; + return error_code; + } + } + + VulkanDispatchData dispatch_data = {}; + dispatch_data.shared_mem_bytes = shared_mem_bytes; + dispatch_data.global_size[0] = blocksX; + dispatch_data.global_size[1] = blocksY; + dispatch_data.global_size[2] = blocksZ; + dispatch_data.local_size[0] = threadsX; + dispatch_data.local_size[1] = threadsY; + dispatch_data.local_size[2] = threadsZ; + + VulkanShaderBinding *entry_point_binding = (cache_entry->shader_bindings + entry_point_index); + + // 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size) + error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, cache_entry->shader_module, cache_entry->pipeline_layout, &(entry_point_binding->compute_pipeline)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to setup compute pipeline!\n"; + return error_code; + } + + // 2d. Create a descriptor set + if (entry_point_binding->descriptor_set == 0) { + + // Construct a descriptor pool + // + // NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's + // inputs and outputs ... so create a pool specific to the number of buffers known at this time + + uint32_t uniform_buffer_count = entry_point_binding->uniform_buffer_count; + uint32_t storage_buffer_count = entry_point_binding->storage_buffer_count; + error_code = vk_create_descriptor_pool(user_context, ctx.allocator, uniform_buffer_count, storage_buffer_count, &(entry_point_binding->descriptor_pool)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Unable to create shader module ... failed to create descriptor pool!\n"; + return error_code; + } + + // Create the descriptor set + error_code = vk_create_descriptor_set(user_context, ctx.allocator, cache_entry->descriptor_set_layouts[entry_point_index], entry_point_binding->descriptor_pool, &(entry_point_binding->descriptor_set)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Unable to create shader module ... failed to create descriptor set!\n"; + return error_code; + } + } + + // 3a. Create a buffer for the scalar parameters + if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) { + size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer); + if (scalar_buffer_size > 0) { + entry_point_binding->args_region = vk_create_scalar_uniform_buffer(user_context, ctx.allocator, scalar_buffer_size); + if (entry_point_binding->args_region == nullptr) { + error(user_context) << "Vulkan: Failed to create scalar uniform buffer!\n"; + return halide_error_code_out_of_memory; + } + } + } + + // 3b. Update uniform buffer with scalar parameters + VkBuffer *args_buffer = nullptr; + if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) { + error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to update scalar uniform buffer!\n"; + return error_code; + } + + args_buffer = reinterpret_cast(entry_point_binding->args_region->handle); + if (args_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve scalar args buffer for device memory!\n"; + return halide_error_code_internal_error; + } + } + + // 3c. Update buffer bindings for descriptor set + error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to update descriptor set!\n"; + return error_code; + } + + // 4. Create a command buffer from the command pool + VkCommandBuffer command_buffer; + error_code = vk_create_command_buffer(user_context, ctx.allocator, ctx.command_pool, &command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command buffer!\n"; + return error_code; + } + + // 5. Fill the command buffer + error_code = vk_fill_command_buffer_with_dispatch_call(user_context, + ctx.device, command_buffer, + entry_point_binding->compute_pipeline, + cache_entry->pipeline_layout, + entry_point_binding->descriptor_set, + entry_point_index, + blocksX, blocksY, blocksZ); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n"; + return error_code; + } + + // 6. Submit the command buffer to our command queue + error_code = vk_submit_command_buffer(user_context, ctx.queue, command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to fill submit command buffer!\n"; + return error_code; + } + + // 7. Wait until the queue is done with the command buffer + VkResult result = vkQueueWaitIdle(ctx.queue); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueWaitIdle returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + // 8. Cleanup + error_code = vk_destroy_command_buffer(user_context, ctx.allocator, ctx.command_pool, command_buffer); + vkResetCommandPool(ctx.device, ctx.command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy command buffer!\n"; + return error_code; + } + +#ifdef DEBUG_RUNTIME + debug(user_context) << "halide_vulkan_run: blocks_allocated=" + << (uint32_t)ctx.allocator->blocks_allocated() << " " + << "bytes_allocated_for_blocks=" << (uint32_t)ctx.allocator->bytes_allocated_for_blocks() << " " + << "regions_allocated=" << (uint32_t)ctx.allocator->regions_allocated() << " " + << "bytes_allocated_for_regions=" << (uint32_t)ctx.allocator->bytes_allocated_for_regions() << " " + << "\n"; + + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + return halide_error_code_success; +} + +WEAK int halide_vulkan_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) { + return halide_default_device_and_host_malloc(user_context, buf, &vulkan_device_interface); +} + +WEAK int halide_vulkan_device_and_host_free(void *user_context, struct halide_buffer_t *buf) { + return halide_default_device_and_host_free(user_context, buf, &vulkan_device_interface); +} + +WEAK int halide_vulkan_wrap_vk_buffer(void *user_context, struct halide_buffer_t *buf, uint64_t vk_buffer) { + halide_debug_assert(user_context, buf->device == 0); + if (buf->device != 0) { + error(user_context) << "Vulkan: Unable to wrap buffer ... invalid device pointer!\n"; + return halide_error_code_device_wrap_native_failed; + } + buf->device = vk_buffer; + buf->device_interface = &vulkan_device_interface; + buf->device_interface->impl->use_module(); + return halide_error_code_success; +} + +WEAK int halide_vulkan_detach_vk_buffer(void *user_context, halide_buffer_t *buf) { + if (buf->device == 0) { + return halide_error_code_success; + } + if (buf->device_interface != &vulkan_device_interface) { + error(user_context) << "Vulkan: Unable to detach buffer ... invalid device interface!\n"; + return halide_error_code_incompatible_device_interface; + } + buf->device = 0; + buf->device_interface->impl->release_module(); + buf->device_interface = nullptr; + return halide_error_code_success; +} + +WEAK uintptr_t halide_vulkan_get_vk_buffer(void *user_context, halide_buffer_t *buf) { + if (buf->device == 0) { + return 0; + } + halide_debug_assert(user_context, buf->device_interface == &vulkan_device_interface); + return (uintptr_t)buf->device; +} + +WEAK const struct halide_device_interface_t *halide_vulkan_device_interface() { + return &vulkan_device_interface; +} + +WEAK halide_device_allocation_pool vulkan_allocation_pool; + +WEAK int halide_vulkan_release_unused_device_allocations(void *user_context) { + debug(user_context) + << "halide_vulkan_release_unused_device_allocations (user_context: " << user_context + << ")\n"; + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + + // collect all unused allocations + ctx.allocator->collect(user_context); + return halide_error_code_success; +} + +namespace { + +WEAK __attribute__((constructor)) void register_vulkan_allocation_pool() { + vulkan_allocation_pool.release_unused = &halide_vulkan_release_unused_device_allocations; + halide_register_device_allocation_pool(&vulkan_allocation_pool); +} + +WEAK __attribute__((destructor)) void halide_vulkan_cleanup() { + halide_vulkan_device_release(nullptr); +} + +// -------------------------------------------------------------------------- + +} // namespace + +// -------------------------------------------------------------------------- + +} // extern "C" linkage + +// -------------------------------------------------------------------------- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// -------------------------------------------------------------------------- + +WEAK halide_device_interface_impl_t vulkan_device_interface_impl = { + halide_use_jit_module, + halide_release_jit_module, + halide_vulkan_device_malloc, + halide_vulkan_device_free, + halide_vulkan_device_sync, + halide_vulkan_device_release, + halide_vulkan_copy_to_host, + halide_vulkan_copy_to_device, + halide_vulkan_device_and_host_malloc, + halide_vulkan_device_and_host_free, + halide_vulkan_buffer_copy, + halide_vulkan_device_crop, + halide_vulkan_device_slice, + halide_vulkan_device_release_crop, + halide_vulkan_wrap_vk_buffer, + halide_vulkan_detach_vk_buffer, +}; + +WEAK halide_device_interface_t vulkan_device_interface = { + halide_device_malloc, + halide_device_free, + halide_device_sync, + halide_device_release, + halide_copy_to_host, + halide_copy_to_device, + halide_device_and_host_malloc, + halide_device_and_host_free, + halide_buffer_copy, + halide_device_crop, + halide_device_slice, + halide_device_release_crop, + halide_device_wrap_native, + halide_device_detach_native, + halide_vulkan_compute_capability, + &vulkan_device_interface_impl}; + +// -------------------------------------------------------------------------- + +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h new file mode 100644 index 000000000000..e064ea26643c --- /dev/null +++ b/src/runtime/vulkan_context.h @@ -0,0 +1,489 @@ +#ifndef HALIDE_RUNTIME_VULKAN_CONTEXT_H +#define HALIDE_RUNTIME_VULKAN_CONTEXT_H + +#include "printer.h" +#include "runtime_internal.h" +#include "scoped_spin_lock.h" + +#include "vulkan_extensions.h" +#include "vulkan_internal.h" +#include "vulkan_memory.h" + +// -------------------------------------------------------------------------- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// -------------------------------------------------------------------------- + +// Vulkan Memory allocator for host-device allocations +halide_vulkan_memory_allocator *WEAK cached_allocator = nullptr; + +// Cached instance related handles for device resources +VkInstance WEAK cached_instance = nullptr; +VkDevice WEAK cached_device = nullptr; +VkCommandPool WEAK cached_command_pool = 0; +VkQueue WEAK cached_queue = nullptr; +VkPhysicalDevice WEAK cached_physical_device = nullptr; +uint32_t WEAK cached_queue_family_index = 0; + +// A Vulkan context/queue/synchronization lock defined in this module with weak linkage +volatile ScopedSpinLock::AtomicFlag WEAK thread_lock = 0; + +// -------------------------------------------------------------------------- + +// Helper object to acquire and release the Vulkan context. +class VulkanContext { + void *user_context; + +public: + VulkanMemoryAllocator *allocator; + VkInstance instance; + VkDevice device; + VkCommandPool command_pool; + VkPhysicalDevice physical_device; + VkQueue queue; + uint32_t queue_family_index; // used for operations requiring queue family + halide_error_code_t error; + + HALIDE_ALWAYS_INLINE VulkanContext(void *user_context) + : user_context(user_context), + allocator(nullptr), + instance(nullptr), + device(nullptr), + command_pool(0), + physical_device(nullptr), + queue(nullptr), + queue_family_index(0), + error(halide_error_code_success) { + + int result = halide_vulkan_acquire_context(user_context, + reinterpret_cast(&allocator), + &instance, &device, &physical_device, &command_pool, &queue, &queue_family_index); + if (result != halide_error_code_success) { + error = halide_error_code_device_interface_no_device; + halide_error_no_device_interface(user_context); + } + halide_debug_assert(user_context, allocator != nullptr); + halide_debug_assert(user_context, instance != nullptr); + halide_debug_assert(user_context, device != nullptr); + halide_debug_assert(user_context, command_pool != 0); + halide_debug_assert(user_context, queue != nullptr); + halide_debug_assert(user_context, physical_device != nullptr); + } + + HALIDE_ALWAYS_INLINE ~VulkanContext() { + halide_vulkan_release_context(user_context, instance, device, queue); + } + + // For now, this is always nullptr + HALIDE_ALWAYS_INLINE const VkAllocationCallbacks *allocation_callbacks() { + return nullptr; + } +}; + +// -------------------------------------------------------------------------- + +namespace { + +int vk_find_compute_capability(void *user_context, int *major, int *minor) { + debug(user_context) << " vk_find_compute_capability (user_context: " << user_context << ")\n"; + + VkInstance instance = nullptr; + VkDevice device = nullptr; + VkPhysicalDevice physical_device = nullptr; + uint32_t queue_family_index = 0; + + StringTable requested_layers; + vk_get_requested_layers(user_context, requested_layers); + + const VkAllocationCallbacks *alloc_callbacks = halide_vulkan_get_allocation_callbacks(user_context); + int status = vk_create_instance(user_context, requested_layers, &instance, alloc_callbacks); + if (status != halide_error_code_success) { + debug(user_context) << " no valid vulkan runtime was found ...\n"; + *major = 0; + *minor = 0; + return 0; + } + + if (vkCreateDevice == nullptr) { + vk_load_vulkan_functions(instance); + } + + status = vk_select_device_for_context(user_context, &instance, &device, &physical_device, &queue_family_index); + if (status != halide_error_code_success) { + debug(user_context) << " no valid vulkan device was found ...\n"; + *major = 0; + *minor = 0; + return 0; + } + + VkPhysicalDeviceProperties device_properties = {0}; + debug(user_context) << " querying for device properties ...\n"; + vkGetPhysicalDeviceProperties(physical_device, &device_properties); + *major = VK_API_VERSION_MAJOR(device_properties.apiVersion); + *minor = VK_API_VERSION_MINOR(device_properties.apiVersion); + debug(user_context) << " found device compute capability v" << *major << "." << *minor << " ...\n"; + + vk_destroy_instance(user_context, instance, alloc_callbacks); + return 0; +} + +// Initializes the instance (used by the default vk_create_context) +int vk_create_instance(void *user_context, const StringTable &requested_layers, VkInstance *instance, const VkAllocationCallbacks *alloc_callbacks) { + debug(user_context) << " vk_create_instance (user_context: " << user_context << ")\n"; + + StringTable required_instance_extensions; + vk_get_required_instance_extensions(user_context, required_instance_extensions); + + StringTable supported_instance_extensions; + vk_get_supported_instance_extensions(user_context, supported_instance_extensions); + + bool valid_instance = vk_validate_required_extension_support(user_context, required_instance_extensions, supported_instance_extensions); + halide_abort_if_false(user_context, valid_instance); + + debug(user_context) << " found " << (uint32_t)required_instance_extensions.size() << " required extensions for instance!\n"; + for (int n = 0; n < (int)required_instance_extensions.size(); ++n) { + debug(user_context) << " extension: " << required_instance_extensions[n] << "\n"; + } + + // If we're running under Molten VK, we must enable the portability extension and create flags + // to allow non-physical devices that are emulated to appear in the device list. + uint32_t create_flags = 0; + if (supported_instance_extensions.contains("VK_KHR_portability_enumeration") && + supported_instance_extensions.contains("VK_MVK_macos_surface")) { + create_flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR; + required_instance_extensions.append(user_context, "VK_KHR_portability_enumeration"); + } + + VkApplicationInfo app_info = { + VK_STRUCTURE_TYPE_APPLICATION_INFO, // struct type + nullptr, // Next + "Runtime", // application name + VK_MAKE_API_VERSION(0, 1, 0, 0), // app version + "Halide", // engine name + VK_MAKE_API_VERSION(0, HALIDE_VERSION_MAJOR, HALIDE_VERSION_MINOR, HALIDE_VERSION_PATCH), // engine version + VK_API_VERSION_1_3}; // FIXME: only use the minimum capability necessary + + VkInstanceCreateInfo create_info = { + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + nullptr, // Next + create_flags, // Flags + &app_info, // ApplicationInfo + (uint32_t)requested_layers.size(), requested_layers.data(), // Layers + (uint32_t)required_instance_extensions.size(), required_instance_extensions.data() // Extensions + }; + + VkResult result = vkCreateInstance(&create_info, alloc_callbacks, instance); + if (result != VK_SUCCESS) { + debug(user_context) << "Vulkan: vkCreateInstance failed with return code: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_interface_no_device; + } + + return halide_error_code_success; +} + +int vk_destroy_instance(void *user_context, VkInstance instance, const VkAllocationCallbacks *alloc_callbacks) { + debug(user_context) << " vk_destroy_instance (user_context: " << user_context << ")\n"; + vkDestroyInstance(instance, alloc_callbacks); + return halide_error_code_success; +} + +int vk_select_device_for_context(void *user_context, + VkInstance *instance, VkDevice *device, + VkPhysicalDevice *physical_device, + uint32_t *queue_family_index) { + // query for the number of physical devices available in this instance + uint32_t device_count = 0; + VkResult result = vkEnumeratePhysicalDevices(*instance, &device_count, nullptr); + if ((result != VK_SUCCESS) && (result != VK_INCOMPLETE)) { + debug(user_context) << "Vulkan: vkEnumeratePhysicalDevices failed with return code: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_interface_no_device; + } + if (device_count == 0) { + debug(user_context) << "Vulkan: No devices found.\n"; + return halide_error_code_device_interface_no_device; + } + + // allocate enough storage for the physical device query results + BlockStorage::Config device_query_storage_config; + device_query_storage_config.entry_size = sizeof(VkPhysicalDevice); + BlockStorage device_query_storage(user_context, device_query_storage_config); + device_query_storage.resize(user_context, device_count); + + VkPhysicalDevice chosen_device = nullptr; + VkPhysicalDevice *avail_devices = (VkPhysicalDevice *)(device_query_storage.data()); + if (avail_devices == nullptr) { + debug(user_context) << "Vulkan: Out of system memory!\n"; + return halide_error_code_out_of_memory; + } + result = vkEnumeratePhysicalDevices(*instance, &device_count, avail_devices); + if ((result != VK_SUCCESS) && (result != VK_INCOMPLETE)) { + debug(user_context) << "Vulkan: vkEnumeratePhysicalDevices failed with return code: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_interface_no_device; + } + + // get the configurable device type to search for (e.g. 'cpu', 'gpu', 'integrated-gpu', 'discrete-gpu', ...) + const char *dev_type = halide_vulkan_get_device_type(user_context); + + // try to find a matching device that supports compute. + uint32_t queue_family = 0; + for (uint32_t i = 0; (chosen_device == nullptr) && (i < device_count); i++) { + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(avail_devices[i], &properties); + debug(user_context) << "Vulkan: Checking device #" << i << "='" << properties.deviceName << "'\n"; + + int matching_device = 0; + if ((dev_type != nullptr) && (*dev_type != '\0')) { + if (strstr(dev_type, "cpu") && (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU)) { + matching_device = 1; + } else if (strstr(dev_type, "integrated-gpu") && ((properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU))) { + matching_device = 1; + } else if (strstr(dev_type, "discrete-gpu") && ((properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU))) { + matching_device = 1; + } else if (strstr(dev_type, "virtual-gpu") && (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU)) { + matching_device = 1; + } else if (strstr(dev_type, "gpu") && ((properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) || (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU))) { + matching_device = 1; + } + } else { + // use a non-virtual gpu device by default + if ((properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) || + (properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU)) { + matching_device = 1; + } + } + + if (matching_device) { + // get the number of supported queues for this physical device + uint32_t queue_properties_count = 0; + vkGetPhysicalDeviceQueueFamilyProperties(avail_devices[i], &queue_properties_count, nullptr); + if (queue_properties_count < 1) { + continue; + } + + // allocate enough storage for the queue properties query results + BlockStorage::Config queue_properties_storage_config; + queue_properties_storage_config.entry_size = sizeof(VkPhysicalDevice); + BlockStorage queue_properties_storage(user_context, queue_properties_storage_config); + queue_properties_storage.resize(user_context, queue_properties_count); + + VkQueueFamilyProperties *queue_properties = (VkQueueFamilyProperties *)(queue_properties_storage.data()); + vkGetPhysicalDeviceQueueFamilyProperties(avail_devices[i], &queue_properties_count, queue_properties); + for (uint32_t j = 0; (chosen_device == nullptr) && (j < queue_properties_count); j++) { + if (queue_properties[j].queueCount > 0 && + queue_properties[j].queueFlags & VK_QUEUE_COMPUTE_BIT) { + chosen_device = avail_devices[i]; + queue_family = j; + + debug(user_context) << "Vulkan: Found matching compute device '" << properties.deviceName << "'\n"; + } + } + } + } + // If nothing, just try the first one for now. + if (chosen_device == nullptr) { + queue_family = 0; + chosen_device = avail_devices[0]; + VkPhysicalDeviceProperties properties; + vkGetPhysicalDeviceProperties(chosen_device, &properties); + debug(user_context) << "Vulkan: Defaulting to first compute device '" << properties.deviceName << "'\n"; + } + + *queue_family_index = queue_family; + *physical_device = chosen_device; + return halide_error_code_success; +} + +int vk_create_device(void *user_context, const StringTable &requested_layers, VkInstance *instance, VkDevice *device, VkQueue *queue, + VkPhysicalDevice *physical_device, uint32_t *queue_family_index, const VkAllocationCallbacks *alloc_callbacks) { + debug(user_context) << " vk_create_device (user_context=" << user_context << ")\n"; + + debug(user_context) << " checking for required device extensions ...\n"; + StringTable required_device_extensions; + vk_get_required_device_extensions(user_context, required_device_extensions); + + debug(user_context) << " checking for optional device extensions ...\n"; + StringTable optional_device_extensions; + vk_get_optional_device_extensions(user_context, optional_device_extensions); + + debug(user_context) << " validating supported device extensions ...\n"; + StringTable supported_device_extensions; + vk_get_supported_device_extensions(user_context, *physical_device, supported_device_extensions); + + bool valid_device = vk_validate_required_extension_support(user_context, required_device_extensions, supported_device_extensions); + if (!valid_device) { + debug(user_context) << "Vulkan: Unable to validate required extension support!\n"; + return halide_error_code_device_interface_no_device; + } + + debug(user_context) << " found " << (uint32_t)required_device_extensions.size() << " required extensions for device!\n"; + for (int n = 0; n < (int)required_device_extensions.size(); ++n) { + debug(user_context) << " required extension: " << required_device_extensions[n] << "\n"; + } + + // enable all available optional extensions + debug(user_context) << " checking for " << (uint32_t)optional_device_extensions.size() << " optional extensions for device ...\n"; + for (int n = 0; n < (int)optional_device_extensions.size(); ++n) { + if (supported_device_extensions.contains(optional_device_extensions[n])) { + debug(user_context) << " optional extension: " << optional_device_extensions[n] << "\n"; + required_device_extensions.append(user_context, optional_device_extensions[n]); + } + } + + float queue_priority = 1.0f; + VkDeviceQueueCreateInfo device_queue_create_info = { + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + nullptr, // Next + 0, // Flags + *queue_family_index, + 1, + &queue_priority, + }; + + // Get the API version to determine what device features are valid to search for + VkPhysicalDeviceProperties device_properties = {0}; + debug(user_context) << " querying for device properties ...\n"; + vkGetPhysicalDeviceProperties(*physical_device, &device_properties); + uint32_t major_version = VK_API_VERSION_MAJOR(device_properties.apiVersion); + uint32_t minor_version = VK_API_VERSION_MINOR(device_properties.apiVersion); + bool has_capability_v11 = (major_version >= 1) && (minor_version >= 1); // supports >= v1.1 + bool has_capability_v12 = (major_version >= 1) && (minor_version >= 2); // supports >= v1.2 + debug(user_context) << " found device compute capability v" << major_version << "." << minor_version << " ...\n"; + + // Get the device features so that all supported features are enabled when device is created + VkPhysicalDeviceFeatures device_features = {}; + void *extended_features_ptr = nullptr; + void *standard_features_ptr = nullptr; + + debug(user_context) << " querying for device features...\n"; + vkGetPhysicalDeviceFeatures(*physical_device, &device_features); + debug(user_context) << " shader float64 support: " << (device_features.shaderFloat64 ? "true" : "false") << "...\n"; + debug(user_context) << " shader int64 support: " << (device_features.shaderInt64 ? "true" : "false") << "...\n"; + debug(user_context) << " shader int16 support: " << (device_features.shaderInt16 ? "true" : "false") << "...\n"; + + // assemble the chain of features to query, but only add the ones that exist in the API version + + // note: requires v1.2+ + VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_f16_i8_ext = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR, + nullptr, VK_FALSE, VK_FALSE}; + + // note: requires v1.2+ + VkPhysicalDevice8BitStorageFeaturesKHR storage_8bit_ext = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR, + &shader_f16_i8_ext, VK_FALSE, VK_FALSE, VK_FALSE}; + + // note: requires v1.1+ + VkPhysicalDevice16BitStorageFeaturesKHR storage_16bit_ext = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, + (has_capability_v12 ? &storage_8bit_ext : nullptr), + VK_FALSE, VK_FALSE, VK_FALSE, VK_FALSE}; + + VkPhysicalDeviceFeatures2KHR device_features_ext = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR, + &storage_16bit_ext, device_features}; + + // Look for extended device feature query method (KHR was removed when it was adopted into v1.1+) + PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(*instance, "vkGetPhysicalDeviceFeatures2KHR"); // v1.0+ + if (!vkGetPhysicalDeviceFeatures2KHR) { + vkGetPhysicalDeviceFeatures2KHR = (PFN_vkGetPhysicalDeviceFeatures2KHR)vkGetInstanceProcAddr(*instance, "vkGetPhysicalDeviceFeatures2"); + } + + // If the instance runtime supports querying extended device features, request them + if (vkGetPhysicalDeviceFeatures2KHR && has_capability_v11) { + + debug(user_context) << " querying for extended device features...\n"; + vkGetPhysicalDeviceFeatures2KHR(*physical_device, &device_features_ext); + debug(user_context) << " shader int8 support: " << (shader_f16_i8_ext.shaderInt8 ? "true" : "false") << "...\n"; + debug(user_context) << " shader float16 support: " << (shader_f16_i8_ext.shaderFloat16 ? "true" : "false") << "...\n"; + if (has_capability_v12) { + debug(user_context) << " storage buffer 8bit access support: " << (storage_8bit_ext.storageBuffer8BitAccess ? "true" : "false") << "...\n"; + debug(user_context) << " storage buffer 16bit access support: " << (storage_16bit_ext.storageBuffer16BitAccess ? "true" : "false") << "...\n"; + } + extended_features_ptr = (void *)(&device_features_ext); // pass extended features (which also contains the standard features) + } else { + standard_features_ptr = &device_features; // pass v1.0 standard features + } + + VkDeviceCreateInfo device_create_info = { + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + extended_features_ptr, // Extended struct ptr (used here for requesting chain of extended features) + 0, // Flags + 1, // Count of queues to create + &device_queue_create_info, + (uint32_t)requested_layers.size(), requested_layers.data(), // Layers + (uint32_t)required_device_extensions.size(), required_device_extensions.data(), // Enabled extensions + (VkPhysicalDeviceFeatures *)standard_features_ptr, // Requested device features + }; + + VkResult result = vkCreateDevice(*physical_device, &device_create_info, alloc_callbacks, device); + if (result != VK_SUCCESS) { + debug(user_context) << "Vulkan: vkCreateDevice failed with return code: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_interface_no_device; + } + + vkGetDeviceQueue(cached_device, *queue_family_index, 0, queue); + return halide_error_code_success; +} + +// Initializes the context (used by the default implementation of halide_acquire_context) +int vk_create_context(void *user_context, VulkanMemoryAllocator **allocator, + VkInstance *instance, VkDevice *device, VkPhysicalDevice *physical_device, + VkCommandPool *command_pool, VkQueue *queue, uint32_t *queue_family_index) { + + debug(user_context) << " vk_create_context (user_context: " << user_context << ")\n"; + + StringTable requested_layers; + uint32_t requested_layer_count = vk_get_requested_layers(user_context, requested_layers); + debug(user_context) << " requested " << requested_layer_count << " layers for instance!\n"; + for (int n = 0; n < (int)requested_layer_count; ++n) { + debug(user_context) << " layer: " << requested_layers[n] << "\n"; + } + + const VkAllocationCallbacks *alloc_callbacks = halide_vulkan_get_allocation_callbacks(user_context); + int error_code = vk_create_instance(user_context, requested_layers, instance, alloc_callbacks); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create instance for context!\n"; + return error_code; + } + + if (vkCreateDevice == nullptr) { + vk_load_vulkan_functions(*instance); + } + + error_code = vk_select_device_for_context(user_context, instance, device, physical_device, queue_family_index); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to select device for context!\n"; + return error_code; + } + + error_code = vk_create_device(user_context, requested_layers, instance, device, queue, physical_device, queue_family_index, alloc_callbacks); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create device for context!\n"; + return error_code; + } + + *allocator = vk_create_memory_allocator(user_context, *device, *physical_device, alloc_callbacks); + if (*allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create memory allocator for device!\n"; + return halide_error_code_generic_error; + } + + error_code = vk_create_command_pool(user_context, *allocator, *queue_family_index, command_pool); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command pool for context!\n"; + return error_code; + } + + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +} // namespace +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +#endif /// HALIDE_RUNTIME_VULKAN_CONTEXT_H diff --git a/src/runtime/vulkan_extensions.h b/src/runtime/vulkan_extensions.h new file mode 100644 index 000000000000..1eaea7ced487 --- /dev/null +++ b/src/runtime/vulkan_extensions.h @@ -0,0 +1,331 @@ +#ifndef HALIDE_RUNTIME_VULKAN_EXTENSIONS_H +#define HALIDE_RUNTIME_VULKAN_EXTENSIONS_H + +#include "vulkan_internal.h" + +// -------------------------------------------------------------------------- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// -------------------------------------------------------------------------- + +WEAK char layer_names[1024]; +WEAK ScopedSpinLock::AtomicFlag layer_names_lock = 0; +WEAK bool layer_names_initialized = false; + +WEAK char extension_names[1024]; +WEAK ScopedSpinLock::AtomicFlag extension_names_lock = 0; +WEAK bool extension_names_initialized = false; + +WEAK char device_type[256]; +WEAK ScopedSpinLock::AtomicFlag device_type_lock = 0; +WEAK bool device_type_initialized = false; + +WEAK char build_options[1024]; +WEAK ScopedSpinLock::AtomicFlag build_options_lock = 0; +WEAK bool build_options_initialized = false; + +WEAK char alloc_config[1024]; +WEAK ScopedSpinLock::AtomicFlag alloc_config_lock = 0; +WEAK bool alloc_config_initialized = false; + +// -------------------------------------------------------------------------- +namespace { + +void vk_set_layer_names_internal(const char *n) { + if (n) { + size_t buffer_size = sizeof(layer_names) / sizeof(layer_names[0]); + StringUtils::copy_up_to(layer_names, n, buffer_size); + } else { + layer_names[0] = 0; + } + layer_names_initialized = true; +} + +const char *vk_get_layer_names_internal(void *user_context) { + if (!layer_names_initialized) { + const char *value = getenv("HL_VK_LAYERS"); + if (value == nullptr) { + value = getenv("VK_INSTANCE_LAYERS"); + } + vk_set_layer_names_internal(value); + } + return layer_names; +} + +void vk_set_extension_names_internal(const char *n) { + if (n) { + size_t buffer_size = sizeof(extension_names) / sizeof(extension_names[0]); + StringUtils::copy_up_to(extension_names, n, buffer_size); + } else { + extension_names[0] = 0; + } + extension_names_initialized = true; +} + +const char *vk_get_extension_names_internal(void *user_context) { + if (!extension_names_initialized) { + const char *name = getenv("HL_VK_EXTENSIONS"); + vk_set_extension_names_internal(name); + } + return extension_names; +} + +void vk_set_device_type_internal(const char *n) { + if (n) { + size_t buffer_size = sizeof(device_type) / sizeof(device_type[0]); + StringUtils::copy_up_to(device_type, n, buffer_size); + } else { + device_type[0] = 0; + } + device_type_initialized = true; +} + +const char *vk_get_device_type_internal(void *user_context) { + if (!device_type_initialized) { + const char *name = getenv("HL_VK_DEVICE_TYPE"); + vk_set_device_type_internal(name); + } + return device_type; +} + +void vk_set_build_options_internal(const char *n) { + if (n) { + size_t buffer_size = sizeof(build_options) / sizeof(build_options[0]); + StringUtils::copy_up_to(build_options, n, buffer_size); + } else { + build_options[0] = 0; + } + build_options_initialized = true; +} + +const char *vk_get_build_options_internal(void *user_context) { + if (!build_options_initialized) { + const char *name = getenv("HL_VK_BUILD_OPTIONS"); + vk_set_build_options_internal(name); + } + return build_options; +} + +void vk_set_alloc_config_internal(const char *n) { + if (n) { + size_t buffer_size = sizeof(alloc_config) / sizeof(alloc_config[0]); + StringUtils::copy_up_to(alloc_config, n, buffer_size); + } else { + alloc_config[0] = 0; + } + alloc_config_initialized = true; +} + +const char *vk_get_alloc_config_internal(void *user_context) { + if (!alloc_config_initialized) { + const char *name = getenv("HL_VK_ALLOC_CONFIG"); + vk_set_alloc_config_internal(name); + } + return alloc_config; +} + +// -------------------------------------------------------------------------- + +uint32_t vk_get_requested_layers(void *user_context, StringTable &layer_table) { + ScopedSpinLock lock(&layer_names_lock); + const char *layer_names = vk_get_layer_names_internal(user_context); + return layer_table.parse(user_context, layer_names, HL_VK_ENV_DELIM); +} + +uint32_t vk_get_required_instance_extensions(void *user_context, StringTable &ext_table) { + const char *required_ext_table[] = {"VK_KHR_get_physical_device_properties2"}; + const uint32_t required_ext_count = sizeof(required_ext_table) / sizeof(required_ext_table[0]); + ext_table.fill(user_context, (const char **)required_ext_table, required_ext_count); + return required_ext_count; +} + +uint32_t vk_get_supported_instance_extensions(void *user_context, StringTable &ext_table) { + + PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties = (PFN_vkEnumerateInstanceExtensionProperties) + vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceExtensionProperties"); + + if (vkEnumerateInstanceExtensionProperties == nullptr) { + debug(user_context) << "Vulkan: Missing vkEnumerateInstanceExtensionProperties proc address! Invalid loader?!\n"; + return 0; + } + + uint32_t avail_ext_count = 0; + vkEnumerateInstanceExtensionProperties(nullptr, &avail_ext_count, nullptr); + debug(user_context) << "Vulkan: vkEnumerateInstanceExtensionProperties found " << avail_ext_count << " extensions ...\n"; + + if (avail_ext_count) { + BlockStorage::Config config; + config.entry_size = sizeof(VkExtensionProperties); + config.minimum_capacity = avail_ext_count; + + BlockStorage extension_properties(user_context, config); + extension_properties.resize(user_context, avail_ext_count); + + vkEnumerateInstanceExtensionProperties(nullptr, + &avail_ext_count, static_cast(extension_properties.data())); + + for (uint32_t n = 0; n < avail_ext_count; ++n) { + const VkExtensionProperties *properties = static_cast(extension_properties[n]); + debug(user_context) << " extension: " << properties->extensionName << "\n"; + } + + ext_table.resize(user_context, avail_ext_count); + for (uint32_t n = 0; n < avail_ext_count; ++n) { + const VkExtensionProperties *properties = static_cast(extension_properties[n]); + ext_table.assign(user_context, n, properties->extensionName); + } + } + + return avail_ext_count; +} + +uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_table) { + const char *required_ext_table[] = {"VK_KHR_8bit_storage", "VK_KHR_storage_buffer_storage_class"}; + const uint32_t required_ext_count = sizeof(required_ext_table) / sizeof(required_ext_table[0]); + ext_table.fill(user_context, (const char **)required_ext_table, required_ext_count); + return required_ext_count; +} + +uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) { + const char *optional_ext_table[] = { + "VK_KHR_portability_subset", //< necessary for running under Molten (aka Vulkan on Mac) + "VK_KHR_16bit_storage", + "VK_KHR_shader_float16_int8", + "VK_KHR_shader_float_controls"}; + const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]); + + ext_table.resize(user_context, optional_ext_count); + for (uint32_t n = 0; n < optional_ext_count; ++n) { + ext_table.assign(user_context, n, optional_ext_table[n]); + } + return optional_ext_count; +} + +uint32_t vk_get_supported_device_extensions(void *user_context, VkPhysicalDevice physical_device, StringTable &ext_table) { + debug(user_context) << "vk_get_supported_device_extensions\n"; + if (vkEnumerateDeviceExtensionProperties == nullptr) { + debug(user_context) << "Vulkan: Missing vkEnumerateDeviceExtensionProperties proc address! Invalid loader?!\n"; + return 0; + } + + uint32_t avail_ext_count = 0; + vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &avail_ext_count, nullptr); + debug(user_context) << "Vulkan: vkEnumerateDeviceExtensionProperties found " << avail_ext_count << " extensions ...\n"; + + if (avail_ext_count > 0) { + BlockStorage::Config config; + config.entry_size = sizeof(VkExtensionProperties); + config.minimum_capacity = avail_ext_count; + + BlockStorage extension_properties(user_context, config); + extension_properties.resize(user_context, avail_ext_count); + + vkEnumerateDeviceExtensionProperties(physical_device, nullptr, + &avail_ext_count, static_cast(extension_properties.data())); + + for (uint32_t n = 0; n < avail_ext_count; ++n) { + const VkExtensionProperties *properties = static_cast(extension_properties[n]); + debug(user_context) << " extension: " << properties->extensionName << "\n"; + } + + ext_table.resize(user_context, avail_ext_count); + for (uint32_t n = 0; n < avail_ext_count; ++n) { + const VkExtensionProperties *properties = static_cast(extension_properties[n]); + ext_table.assign(user_context, n, properties->extensionName); + } + } + + return avail_ext_count; +} + +bool vk_validate_required_extension_support(void *user_context, + const StringTable &required_extensions, + const StringTable &supported_extensions) { + bool validated = true; + for (uint32_t n = 0; n < required_extensions.size(); ++n) { + const char *extension = required_extensions[n]; + if (!supported_extensions.contains(extension)) { + debug(user_context) << "Vulkan: Missing required extension: '" << extension << "'! \n"; + validated = false; + } + } + return validated; +} + +// -------------------------------------------------------------------------- + +} // namespace +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +// -------------------------------------------------------------------------- + +using namespace Halide::Runtime::Internal::Vulkan; + +// -------------------------------------------------------------------------- + +extern "C" { + +// -------------------------------------------------------------------------- + +WEAK void halide_vulkan_set_layer_names(const char *n) { + ScopedSpinLock lock(&layer_names_lock); + vk_set_layer_names_internal(n); +} + +WEAK const char *halide_vulkan_get_layer_names(void *user_context) { + ScopedSpinLock lock(&layer_names_lock); + return vk_get_layer_names_internal(user_context); +} + +WEAK void halide_vulkan_set_extension_names(const char *n) { + ScopedSpinLock lock(&extension_names_lock); + vk_set_extension_names_internal(n); +} + +WEAK const char *halide_vulkan_get_extension_names(void *user_context) { + ScopedSpinLock lock(&extension_names_lock); + return vk_get_extension_names_internal(user_context); +} + +WEAK void halide_vulkan_set_device_type(const char *n) { + ScopedSpinLock lock(&device_type_lock); + vk_set_device_type_internal(n); +} + +WEAK const char *halide_vulkan_get_device_type(void *user_context) { + ScopedSpinLock lock(&device_type_lock); + return vk_get_device_type_internal(user_context); +} + +WEAK void halide_vulkan_set_build_options(const char *n) { + ScopedSpinLock lock(&build_options_lock); + vk_set_build_options_internal(n); +} + +WEAK const char *halide_vulkan_get_build_options(void *user_context) { + ScopedSpinLock lock(&build_options_lock); + return vk_get_build_options_internal(user_context); +} + +WEAK void halide_vulkan_set_alloc_config(const char *n) { + ScopedSpinLock lock(&alloc_config_lock); + vk_set_alloc_config_internal(n); +} + +WEAK const char *halide_vulkan_get_alloc_config(void *user_context) { + ScopedSpinLock lock(&alloc_config_lock); + return vk_get_alloc_config_internal(user_context); +} + +// -------------------------------------------------------------------------- + +} // extern "C" + +#endif // HALIDE_RUNTIME_VULKAN_EXTENSIONS_H \ No newline at end of file diff --git a/src/runtime/vulkan_functions.h b/src/runtime/vulkan_functions.h new file mode 100644 index 000000000000..d1c0a8bfd32c --- /dev/null +++ b/src/runtime/vulkan_functions.h @@ -0,0 +1,45 @@ +// NOTE: vkCreateInstance is already defined in the mini_vulkan header +VULKAN_FN(vkDestroyInstance) +VULKAN_FN(vkCreateDevice) +VULKAN_FN(vkDestroyDevice) +VULKAN_FN(vkGetDeviceQueue) +VULKAN_FN(vkCreateBuffer) +VULKAN_FN(vkDestroyBuffer) +VULKAN_FN(vkDestroyCommandPool) +VULKAN_FN(vkDestroyDescriptorPool) +VULKAN_FN(vkDestroyDescriptorSetLayout) +VULKAN_FN(vkDestroyPipeline) +VULKAN_FN(vkDestroyPipelineLayout) +VULKAN_FN(vkDestroyShaderModule) +VULKAN_FN(vkEnumerateDeviceExtensionProperties) +VULKAN_FN(vkEnumeratePhysicalDevices) +VULKAN_FN(vkGetPhysicalDeviceFeatures) +VULKAN_FN(vkGetPhysicalDeviceProperties) +VULKAN_FN(vkGetPhysicalDeviceQueueFamilyProperties) +VULKAN_FN(vkGetBufferMemoryRequirements) +VULKAN_FN(vkCreateShaderModule) +VULKAN_FN(vkCreateDescriptorSetLayout) +VULKAN_FN(vkCreatePipelineLayout) +VULKAN_FN(vkCreateComputePipelines) +VULKAN_FN(vkCreateDescriptorPool) +VULKAN_FN(vkAllocateDescriptorSets) +VULKAN_FN(vkGetPhysicalDeviceMemoryProperties) +VULKAN_FN(vkAllocateMemory) +VULKAN_FN(vkBindBufferMemory) +VULKAN_FN(vkMapMemory) +VULKAN_FN(vkUnmapMemory) +VULKAN_FN(vkFreeMemory) +VULKAN_FN(vkUpdateDescriptorSets) +VULKAN_FN(vkCreateCommandPool) +VULKAN_FN(vkResetCommandPool) +VULKAN_FN(vkAllocateCommandBuffers) +VULKAN_FN(vkFreeCommandBuffers) +VULKAN_FN(vkBeginCommandBuffer) +VULKAN_FN(vkCmdBindPipeline) +VULKAN_FN(vkCmdBindDescriptorSets) +VULKAN_FN(vkCmdDispatch) +VULKAN_FN(vkQueueSubmit) +VULKAN_FN(vkQueueWaitIdle) +VULKAN_FN(vkEndCommandBuffer) +VULKAN_FN(vkCmdFillBuffer) +VULKAN_FN(vkCmdCopyBuffer) \ No newline at end of file diff --git a/src/runtime/vulkan_interface.h b/src/runtime/vulkan_interface.h new file mode 100644 index 000000000000..676c8548f6fc --- /dev/null +++ b/src/runtime/vulkan_interface.h @@ -0,0 +1,66 @@ +#ifndef HALIDE_RUNTIME_VULKAN_INTERFACE_H +#define HALIDE_RUNTIME_VULKAN_INTERFACE_H + +#include "runtime_internal.h" + +// -------------------------------------------------------------------------- +// Vulkan Specific Definitions +// -------------------------------------------------------------------------- + +// Vulkan API version identifier macro +#define VK_MAKE_API_VERSION(variant, major, minor, patch) \ + ((((uint32_t)(variant)) << 29) | (((uint32_t)(major)) << 22) | (((uint32_t)(minor)) << 12) | ((uint32_t)(patch))) + +// Vulkan API version 1.0.0 +#define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0) // Patch version should always be set to 0 + +// Environment variable string delimiter +#ifdef WINDOWS +#define HL_VK_ENV_DELIM ";" +#else +#define HL_VK_ENV_DELIM ":" +#endif + +// Prototypes for the subset of the Vulkan API we need +#define VK_NO_PROTOTYPES +// NOLINTNEXTLINE +#include "mini_vulkan.h" + +// -------------------------------------------------------------------------- +// Vulkan API Definition +// -------------------------------------------------------------------------- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// -------------------------------------------------------------------------- + +// Halide device interface struct for runtime specific funtion table +extern WEAK halide_device_interface_t vulkan_device_interface; + +// -------------------------------------------------------------------------- + +// clang-format off +#define VULKAN_FN(fn) WEAK PFN_##fn fn; +#include "vulkan_functions.h" +#undef VULKAN_FN +// clang-format on + +void WEAK vk_load_vulkan_functions(VkInstance instance) { +#define VULKAN_FN(fn) fn = (PFN_##fn)vkGetInstanceProcAddr(instance, #fn); +#include "vulkan_functions.h" +#undef VULKAN_FN +} + +// -- + +// -------------------------------------------------------------------------- + +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +#endif // HALIDE_RUNTIME_VULKAN_INTERFACE_H diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h new file mode 100644 index 000000000000..05eb03361d15 --- /dev/null +++ b/src/runtime/vulkan_internal.h @@ -0,0 +1,296 @@ +#ifndef HALIDE_RUNTIME_VULKAN_INTERNAL_H +#define HALIDE_RUNTIME_VULKAN_INTERNAL_H + +#include "gpu_context_common.h" +#include "printer.h" +#include "runtime_internal.h" +#include "scoped_spin_lock.h" + +#include "internal/block_storage.h" +#include "internal/linked_list.h" +#include "internal/memory_arena.h" +#include "internal/string_storage.h" +#include "internal/string_table.h" + +#include "vulkan_interface.h" + +// -- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// Declarations +class VulkanMemoryAllocator; +struct VulkanShaderBinding; +struct VulkanCompilationCacheEntry; + +// -------------------------------------------------------------------------- + +namespace { // internalize + +// -------------------------------------------------------------------------- +// Memory +// -------------------------------------------------------------------------- +void *vk_host_malloc(void *user_context, size_t size, size_t alignment, VkSystemAllocationScope scope, const VkAllocationCallbacks *callbacks = nullptr); +void vk_host_free(void *user_context, void *ptr, const VkAllocationCallbacks *callbacks = nullptr); +int vk_device_crop_from_offset(void *user_context, const struct halide_buffer_t *src, int64_t offset, struct halide_buffer_t *dst); +VulkanMemoryAllocator *vk_create_memory_allocator(void *user_context, VkDevice device, VkPhysicalDevice physical_device, + const VkAllocationCallbacks *alloc_callbacks); + +int vk_destroy_memory_allocator(void *user_context, VulkanMemoryAllocator *allocator); +int vk_clear_device_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + VkCommandPool command_pool, + VkQueue command_queue, + VkBuffer device_buffer); +// -------------------------------------------------------------------------- +// Context +// -------------------------------------------------------------------------- + +int vk_create_context( + void *user_context, + VulkanMemoryAllocator **allocator, + VkInstance *instance, + VkDevice *device, + VkPhysicalDevice *physical_device, + VkCommandPool *command_pool, + VkQueue *queue, uint32_t *queue_family_index); + +int vk_find_compute_capability(void *user_context, int *major, int *minor); + +int vk_create_instance(void *user_context, const StringTable &requested_layers, VkInstance *instance, const VkAllocationCallbacks *alloc_callbacks); +int vk_destroy_instance(void *user_context, VkInstance instance, const VkAllocationCallbacks *alloc_callbacks); + +int vk_select_device_for_context(void *user_context, + VkInstance *instance, VkDevice *device, + VkPhysicalDevice *physical_device, + uint32_t *queue_family_index); + +int vk_create_device(void *user_context, const StringTable &requested_layers, VkInstance *instance, VkDevice *device, VkQueue *queue, + VkPhysicalDevice *physical_device, uint32_t *queue_family_index, const VkAllocationCallbacks *alloc_callbacks); + +// -------------------------------------------------------------------------- +// Extensions +// -------------------------------------------------------------------------- +uint32_t vk_get_requested_layers(void *user_context, StringTable &layer_table); +uint32_t vk_get_required_instance_extensions(void *user_context, StringTable &ext_table); +uint32_t vk_get_supported_instance_extensions(void *user_context, StringTable &ext_table); +uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_table); +uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table); +uint32_t vk_get_supported_device_extensions(void *user_context, VkPhysicalDevice physical_device, StringTable &ext_table); +bool vk_validate_required_extension_support(void *user_context, + const StringTable &required_extensions, + const StringTable &supported_extensions); + +// -------------------------------------------------------------------------- +// Resources +// -------------------------------------------------------------------------- + +// -- Command Pool +int vk_create_command_pool(void *user_context, VulkanMemoryAllocator *allocator, uint32_t queue_index, VkCommandPool *command_pool); +int vk_destroy_command_pool(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool); + +// -- Command Buffer +int vk_create_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool pool, VkCommandBuffer *command_buffer); +int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool, VkCommandBuffer command_buffer); + +int vk_fill_command_buffer_with_dispatch_call(void *user_context, + VkDevice device, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index, + int blocksX, int blocksY, int blocksZ); + +int vk_submit_command_buffer(void *user_context, VkQueue queue, VkCommandBuffer command_buffer); + +// -- Scalar Uniform Buffer +bool vk_needs_scalar_uniform_buffer(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + +size_t vk_estimate_scalar_uniform_buffer_size(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + +MemoryRegion *vk_create_scalar_uniform_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + size_t scalar_buffer_size); + +int vk_update_scalar_uniform_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + MemoryRegion *region, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + +int vk_destroy_scalar_uniform_buffer(void *user_context, VulkanMemoryAllocator *allocator, + MemoryRegion *scalar_args_region); +// -- Descriptor Pool +int vk_create_descriptor_pool(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t uniform_buffer_count, + uint32_t storage_buffer_count, + VkDescriptorPool *descriptor_pool); + +int vk_destroy_descriptor_pool(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorPool descriptor_pool); + +// -- Descriptor Set Layout +uint32_t vk_count_bindings_for_descriptor_set(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]); + +int vk_create_descriptor_set_layout(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t uniform_buffer_count, + uint32_t storage_buffer_count, + VkDescriptorSetLayout *layout); + +int vk_destroy_descriptor_set_layout(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSetLayout descriptor_set_layout); + +// -- Descriptor Set +int vk_create_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorPool descriptor_pool, + VkDescriptorSet *descriptor_set); + +int vk_update_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + VkDescriptorSet descriptor_set); + +// -- Pipeline Layout +int vk_create_pipeline_layout(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t descriptor_set_count, + VkDescriptorSetLayout *descriptor_set_layouts, + VkPipelineLayout *pipeline_layout); + +int vk_destroy_pipeline_layout(void *user_context, + VulkanMemoryAllocator *allocator, + VkPipelineLayout pipeline_layout); +// -- Compute Pipeline +int vk_create_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + const char *pipeline_name, + VkShaderModule shader_module, + VkPipelineLayout pipeline_layout, + VkSpecializationInfo *specialization_info, + VkPipeline *compute_pipeline); + +int vk_setup_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + VulkanShaderBinding *shader_bindings, + VkShaderModule shader_module, + VkPipelineLayout pipeline_layout, + VkPipeline *compute_pipeline); + +int vk_destroy_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + VkPipeline compute_pipeline); + +// -- Shader Module +VulkanShaderBinding *vk_decode_shader_bindings(void *user_context, VulkanMemoryAllocator *allocator, + const uint32_t *module_ptr, uint32_t module_size); + +VulkanCompilationCacheEntry *vk_compile_shader_module(void *user_context, VulkanMemoryAllocator *allocator, + const char *src, int size); + +int vk_destroy_shader_modules(void *user_context, VulkanMemoryAllocator *allocator); + +// -- Copy Buffer +int vk_do_multidimensional_copy(void *user_context, VkCommandBuffer command_buffer, + const device_copy &c, uint64_t src_offset, uint64_t dst_offset, + int d, bool from_host, bool to_host); + +// -------------------------------------------------------------------------- +// Errors +// -------------------------------------------------------------------------- + +// Returns the corresponding string for a given vulkan error code +const char *vk_get_error_name(VkResult error) { + switch (error) { + case VK_SUCCESS: + return "VK_SUCCESS"; + case VK_NOT_READY: + return "VK_NOT_READY"; + case VK_TIMEOUT: + return "VK_TIMEOUT"; + case VK_EVENT_SET: + return "VK_EVENT_SET"; + case VK_EVENT_RESET: + return "VK_EVENT_RESET"; + case VK_INCOMPLETE: + return "VK_INCOMPLETE"; + case VK_ERROR_OUT_OF_HOST_MEMORY: + return "VK_ERROR_OUT_OF_HOST_MEMORY"; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: + return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + case VK_ERROR_INITIALIZATION_FAILED: + return "VK_ERROR_INITIALIZATION_FAILED"; + case VK_ERROR_DEVICE_LOST: + return "VK_ERROR_DEVICE_LOST"; + case VK_ERROR_MEMORY_MAP_FAILED: + return "VK_ERROR_MEMORY_MAP_FAILED"; + case VK_ERROR_LAYER_NOT_PRESENT: + return "VK_ERROR_LAYER_NOT_PRESENT"; + case VK_ERROR_EXTENSION_NOT_PRESENT: + return "VK_ERROR_EXTENSION_NOT_PRESENT"; + case VK_ERROR_FEATURE_NOT_PRESENT: + return "VK_ERROR_FEATURE_NOT_PRESENT"; + case VK_ERROR_INCOMPATIBLE_DRIVER: + return "VK_ERROR_INCOMPATIBLE_DRIVER"; + case VK_ERROR_TOO_MANY_OBJECTS: + return "VK_ERROR_TOO_MANY_OBJECTS"; + case VK_ERROR_FORMAT_NOT_SUPPORTED: + return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + case VK_ERROR_FRAGMENTED_POOL: + return "VK_ERROR_FRAGMENTED_POOL"; + case VK_ERROR_SURFACE_LOST_KHR: + return "VK_ERROR_SURFACE_LOST_KHR"; + case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: + return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; + case VK_SUBOPTIMAL_KHR: + return "VK_SUBOPTIMAL_KHR"; + case VK_ERROR_OUT_OF_DATE_KHR: + return "VK_ERROR_OUT_OF_DATE_KHR"; + case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: + return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; + case VK_ERROR_VALIDATION_FAILED_EXT: + return "VK_ERROR_VALIDATION_FAILED_EXT"; + case VK_ERROR_INVALID_SHADER_NV: + return "VK_ERROR_INVALID_SHADER_NV"; + case VK_ERROR_OUT_OF_POOL_MEMORY_KHR: + return "VK_ERROR_OUT_OF_POOL_MEMORY_KHR"; + case VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR: + return "VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR"; + default: + return ""; + } +} + +// -------------------------------------------------------------------------- + +} // namespace +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +#endif // HALIDE_RUNTIME_VULKAN_INTERNAL_H diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h new file mode 100644 index 000000000000..c925538265dc --- /dev/null +++ b/src/runtime/vulkan_memory.h @@ -0,0 +1,1219 @@ +#ifndef HALIDE_RUNTIME_VULKAN_MEMORY_H +#define HALIDE_RUNTIME_VULKAN_MEMORY_H + +#include "internal/block_allocator.h" +#include "internal/memory_resources.h" +#include "vulkan_internal.h" + +// Uncomment to enable verbose memory allocation debugging +// #define HL_VK_DEBUG_MEM 1 + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// -------------------------------------------------------------------------- + +// Enable external client to override Vulkan allocation callbacks (if they so desire) +WEAK ScopedSpinLock::AtomicFlag custom_allocation_callbacks_lock = 0; +WEAK const VkAllocationCallbacks *custom_allocation_callbacks = nullptr; // nullptr => use Vulkan runtime implementation + +// -------------------------------------------------------------------------- + +// Runtime configuration parameters to adjust the behaviour of the block allocator +struct VulkanMemoryConfig { + size_t maximum_pool_size = 0; //< Maximum number of bytes to allocate for the entire pool (including all blocks). Specified in bytes. Zero means no constraint + size_t minimum_block_size = 32 * 1024 * 1024; //< Default block size is 32MB + size_t maximum_block_size = 0; //< Specified in bytes. Zero means no constraint + size_t maximum_block_count = 0; //< Maximum number of blocks to allocate. Zero means no constraint + size_t nearest_multiple = 32; //< Always round up the requested region sizes to the given integer value. Zero means no constraint +}; +WEAK VulkanMemoryConfig memory_allocator_config; + +// -------------------------------------------------------------------------- + +/** Vulkan Memory Allocator class interface for managing large + * memory requests stored as contiguous blocks of memory, which + * are then sub-allocated into smaller regions of + * memory to avoid the excessive cost of vkAllocate and the limited + * number of available allocation calls through the API. + */ +class VulkanMemoryAllocator { +public: + // disable copy constructors and assignment + VulkanMemoryAllocator(const VulkanMemoryAllocator &) = delete; + VulkanMemoryAllocator &operator=(const VulkanMemoryAllocator &) = delete; + + // disable non-factory constrction + VulkanMemoryAllocator() = delete; + ~VulkanMemoryAllocator() = delete; + + // Factory methods for creation / destruction + static VulkanMemoryAllocator *create(void *user_context, const VulkanMemoryConfig &config, + VkDevice dev, VkPhysicalDevice phys_dev, + const SystemMemoryAllocatorFns &system_allocator, + const VkAllocationCallbacks *alloc_callbacks = nullptr); + + static int destroy(void *user_context, VulkanMemoryAllocator *allocator); + + // Public interface methods + MemoryRegion *reserve(void *user_context, MemoryRequest &request); + int release(void *user_context, MemoryRegion *region); //< unmark and cache the region for reuse + int reclaim(void *user_context, MemoryRegion *region); //< free the region and consolidate + int retain(void *user_context, MemoryRegion *region); //< retain the region and increase its use count + bool collect(void *user_context); //< returns true if any blocks were removed + int release(void *user_context); + int destroy(void *user_context); + + void *map(void *user_context, MemoryRegion *region); + int unmap(void *user_context, MemoryRegion *region); + MemoryRegion *create_crop(void *user_context, MemoryRegion *region, uint64_t offset); + int destroy_crop(void *user_context, MemoryRegion *region); + MemoryRegion *owner_of(void *user_context, MemoryRegion *region); + + VkDevice current_device() const { + return this->device; + } + VkPhysicalDevice current_physical_device() const { + return this->physical_device; + } + const VkAllocationCallbacks *callbacks() const { + return this->alloc_callbacks; + } + + static const VulkanMemoryConfig &default_config(); + + static int allocate_block(void *instance_ptr, MemoryBlock *block); + static int deallocate_block(void *instance_ptr, MemoryBlock *block); + + static int allocate_region(void *instance_ptr, MemoryRegion *region); + static int deallocate_region(void *instance_ptr, MemoryRegion *region); + + size_t bytes_allocated_for_blocks() const; + size_t blocks_allocated() const; + + size_t bytes_allocated_for_regions() const; + size_t regions_allocated() const; + +private: + static constexpr uint32_t invalid_usage_flags = uint32_t(-1); + static constexpr uint32_t invalid_memory_type = uint32_t(VK_MAX_MEMORY_TYPES); + + // Initializes a new instance + int initialize(void *user_context, const VulkanMemoryConfig &config, + VkDevice dev, VkPhysicalDevice phys_dev, + const SystemMemoryAllocatorFns &system_allocator, + const VkAllocationCallbacks *alloc_callbacks = nullptr); + + uint32_t select_memory_usage(void *user_context, MemoryProperties properties) const; + + uint32_t select_memory_type(void *user_context, + VkPhysicalDevice physical_device, + MemoryProperties properties, + uint32_t required_flags) const; + + size_t block_byte_count = 0; + size_t block_count = 0; + size_t region_byte_count = 0; + size_t region_count = 0; + void *owner_context = nullptr; + VulkanMemoryConfig config; + VkDevice device = nullptr; + VkPhysicalDevice physical_device = nullptr; + VkPhysicalDeviceLimits physical_device_limits = {}; + const VkAllocationCallbacks *alloc_callbacks = nullptr; + BlockAllocator *block_allocator = nullptr; +}; + +VulkanMemoryAllocator *VulkanMemoryAllocator::create(void *user_context, + const VulkanMemoryConfig &cfg, VkDevice dev, VkPhysicalDevice phys_dev, + const SystemMemoryAllocatorFns &system_allocator, + const VkAllocationCallbacks *alloc_callbacks) { + + if (system_allocator.allocate == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to create instance! Missing system allocator interface!\n"; + return nullptr; + } + + VulkanMemoryAllocator *result = reinterpret_cast( + system_allocator.allocate(user_context, sizeof(VulkanMemoryAllocator))); + + if (result == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Failed to create instance! Out of memory!\n"; + return nullptr; // caller must handle error case for out-of-memory + } + + result->initialize(user_context, cfg, dev, phys_dev, system_allocator, alloc_callbacks); + return result; +} + +int VulkanMemoryAllocator::destroy(void *user_context, VulkanMemoryAllocator *instance) { + if (instance == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to destroy instance! Invalide instance pointer!\n"; + return halide_error_code_internal_error; + } + const BlockAllocator::MemoryAllocators &allocators = instance->block_allocator->current_allocators(); + instance->destroy(user_context); + BlockAllocator::destroy(user_context, instance->block_allocator); + if (allocators.system.deallocate == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to destroy instance! Missing system allocator interface!\n"; + return halide_error_code_internal_error; + } + allocators.system.deallocate(user_context, instance); + return halide_error_code_success; +} + +int VulkanMemoryAllocator::initialize(void *user_context, + const VulkanMemoryConfig &cfg, VkDevice dev, VkPhysicalDevice phys_dev, + const SystemMemoryAllocatorFns &system_allocator, + const VkAllocationCallbacks *callbacks) { + + owner_context = user_context; + config = cfg; + device = dev; + physical_device = phys_dev; + alloc_callbacks = callbacks; + region_count = 0; + region_byte_count = 0; + block_count = 0; + block_byte_count = 0; + BlockAllocator::MemoryAllocators allocators; + allocators.system = system_allocator; + allocators.block = {VulkanMemoryAllocator::allocate_block, VulkanMemoryAllocator::deallocate_block}; + allocators.region = {VulkanMemoryAllocator::allocate_region, VulkanMemoryAllocator::deallocate_region}; + BlockAllocator::Config block_allocator_config = {0}; + block_allocator_config.maximum_pool_size = cfg.maximum_pool_size; + block_allocator_config.maximum_block_count = cfg.maximum_block_count; + block_allocator_config.maximum_block_size = cfg.maximum_block_size; + block_allocator_config.minimum_block_size = cfg.minimum_block_size; + block_allocator = BlockAllocator::create(user_context, block_allocator_config, allocators); + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Failed to create BlockAllocator! Out of memory?!\n"; + return halide_error_code_out_of_memory; + } + + // get the physical device properties to determine limits and allocation requirements + VkPhysicalDeviceProperties physical_device_properties = {0}; + memset(&physical_device_limits, 0, sizeof(VkPhysicalDeviceLimits)); + vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties); + memcpy(&physical_device_limits, &(physical_device_properties.limits), sizeof(VkPhysicalDeviceLimits)); + return halide_error_code_success; +} + +MemoryRegion *VulkanMemoryAllocator::reserve(void *user_context, MemoryRequest &request) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Reserving memory (" + << "user_context=" << user_context << " " + << "block_allocator=" << (void *)(block_allocator) << " " + << "request_size=" << (uint32_t)(request.size) << " " + << "device=" << (void *)(device) << " " + << "physical_device=" << (void *)(physical_device) << ") ...\n"; +#endif + + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to reserve memory! Invalid device handle!\n"; + return nullptr; + } + + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to reserve memory! Invalid block allocator!\n"; + return nullptr; + } + + return block_allocator->reserve(this, request); +} + +void *VulkanMemoryAllocator::map(void *user_context, MemoryRegion *region) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Mapping region (" + << "user_context=" << user_context << " " + << "device=" << (void *)(device) << " " + << "physical_device=" << (void *)(physical_device) << " " + << "region=" << (void *)(region) << " " + << "region_size=" << (uint32_t)region->size << " " + << "region_offset=" << (uint32_t)region->offset << " " + << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to map memory! Invalid device handle!\n"; + return nullptr; + } + + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to map memory! Invalid block allocator!\n"; + return nullptr; + } + + MemoryRegion *owner = owner_of(user_context, region); + RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, owner); + if (region_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid region allocator handle!\n"; + return nullptr; // NOTE: caller must handle nullptr + } + + BlockResource *block_resource = region_allocator->block_resource(); + if (block_resource == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid block resource handle!\n"; + return nullptr; // NOTE: caller must handle nullptr + } + + VkDeviceMemory *device_memory = reinterpret_cast(block_resource->memory.handle); + if (device_memory == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid device memory handle!\n"; + return nullptr; // NOTE: caller must handle nullptr + } + + void *mapped_ptr = nullptr; + VkDeviceSize memory_offset = region->offset + region->range.head_offset; + VkDeviceSize memory_size = region->size - region->range.tail_offset - region->range.head_offset; + if (((double)region->size - (double)region->range.tail_offset - (double)region->range.head_offset) <= 0.0) { + error(user_context) << "VulkanMemoryAllocator: Unable to map region! Invalid memory range !\n"; + return nullptr; + } + debug(nullptr) << "VulkanMemoryAllocator: MapMemory (" + << "user_context=" << user_context << "\n" + << " region_size=" << (uint32_t)region->size << "\n" + << " region_offset=" << (uint32_t)region->offset << "\n" + << " region_range.head_offset=" << (uint32_t)region->range.head_offset << "\n" + << " region_range.tail_offset=" << (uint32_t)region->range.tail_offset << "\n" + << " memory_offset=" << (uint32_t)memory_offset << "\n" + << " memory_size=" << (uint32_t)memory_size << ") ...\n"; + + VkResult result = vkMapMemory(device, *device_memory, memory_offset, memory_size, 0, (void **)(&mapped_ptr)); + if (result != VK_SUCCESS) { + error(user_context) << "VulkanMemoryAllocator: Mapping region failed! vkMapMemory returned error code: " << vk_get_error_name(result) << "\n"; + return nullptr; + } + + return mapped_ptr; +} + +int VulkanMemoryAllocator::unmap(void *user_context, MemoryRegion *region) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Unmapping region (" + << "user_context=" << user_context << " " + << "device=" << (void *)(device) << " " + << "physical_device=" << (void *)(physical_device) << " " + << "region=" << (void *)(region) << " " + << "region_size=" << (uint32_t)region->size << " " + << "region_offset=" << (uint32_t)region->offset << " " + << "crop_offset=" << (uint32_t)region->range.head_offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid device handle!\n"; + return halide_error_code_generic_error; + } + + MemoryRegion *owner = owner_of(user_context, region); + RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, owner); + if (region_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid region allocator handle!\n"; + return halide_error_code_internal_error; + } + + BlockResource *block_resource = region_allocator->block_resource(); + if (block_resource == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid block resource handle!\n"; + return halide_error_code_internal_error; + } + + VkDeviceMemory *device_memory = reinterpret_cast(block_resource->memory.handle); + if (device_memory == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid device memory handle!\n"; + return halide_error_code_internal_error; + } + + vkUnmapMemory(device, *device_memory); + return halide_error_code_success; +} + +MemoryRegion *VulkanMemoryAllocator::create_crop(void *user_context, MemoryRegion *region, uint64_t offset) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Cropping region (" + << "user_context=" << user_context << " " + << "device=" << (void *)(device) << " " + << "physical_device=" << (void *)(physical_device) << " " + << "region=" << (void *)(region) << " " + << "region_size=" << (uint32_t)region->size << " " + << "region_offset=" << (uint32_t)region->offset << " " + << "crop_offset=" << (int64_t)offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to crop region! Invalid device handle!\n"; + return nullptr; + } + + MemoryRegion *owner = owner_of(user_context, region); + RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, owner); + if (region_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to unmap region! Invalid region allocator handle!\n"; + return nullptr; // NOTE: caller must handle nullptr + } + + // increment usage count + int error_code = region_allocator->retain(this, owner); + if (error_code != halide_error_code_success) { + error(user_context) << "VulkanMemoryAllocator: Unable to crop region! Failed to retain memory region!\n"; + return nullptr; // NOTE: caller must handle nullptr + } + + // create a new region to return, and copy all the other region's properties + const BlockAllocator::MemoryAllocators &allocators = block_allocator->current_allocators(); + if (allocators.system.allocate == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to create crop! Missing system allocator interface!\n"; + return nullptr; + } + + MemoryRegion *memory_region = reinterpret_cast( + allocators.system.allocate(user_context, sizeof(MemoryRegion))); + + if (memory_region == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Failed to allocate memory region! Out of memory!\n"; + return nullptr; + } + memcpy(memory_region, owner, sizeof(MemoryRegion)); + + // point the handle to the owner of the allocated region, and update the head offset + memory_region->is_owner = false; + memory_region->handle = (void *)owner; + memory_region->range.head_offset = owner->range.head_offset + offset; + return memory_region; +} + +int VulkanMemoryAllocator::destroy_crop(void *user_context, MemoryRegion *region) { + if (region == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Failed to destroy crop! Invalid memory region!\n"; + return halide_error_code_generic_error; + } + + MemoryRegion *owner = owner_of(user_context, region); + RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, owner); + if (region_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to destroy crop region! Invalid region allocator handle!\n"; + return halide_error_code_internal_error; + } + + // decrement usage count + int error_code = region_allocator->release(this, owner); + if (error_code != halide_error_code_success) { + error(user_context) << "VulkanBlockAllocator: Unable to destroy crop region! Region allocator failed to release memory region!\n"; + return error_code; + } + + // discard the copied region struct + const BlockAllocator::MemoryAllocators &allocators = block_allocator->current_allocators(); + if (allocators.system.deallocate == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to destroy crop region! Missing system allocator interface!\n"; + return halide_error_code_internal_error; + } + allocators.system.deallocate(user_context, region); + return halide_error_code_success; +} + +MemoryRegion *VulkanMemoryAllocator::owner_of(void *user_context, MemoryRegion *region) { + if (region->is_owner) { + return region; + } else { + // If this is a cropped region, use the handle to retrieve the owner of the allocation + return reinterpret_cast(region->handle); + } +} + +int VulkanMemoryAllocator::release(void *user_context, MemoryRegion *region) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Releasing region (" + << "user_context=" << user_context << " " + << "region=" << (void *)(region) << " " + << "size=" << (uint32_t)region->size << " " + << "offset=" << (uint32_t)region->offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to release region! Invalid device handle!\n"; + return halide_error_code_generic_error; + } + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to release region! Invalid block allocator!\n"; + return halide_error_code_generic_error; + } + return block_allocator->release(this, region); +} + +int VulkanMemoryAllocator::reclaim(void *user_context, MemoryRegion *region) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Reclaiming region (" + << "user_context=" << user_context << " " + << "region=" << (void *)(region) << " " + << "size=" << (uint32_t)region->size << " " + << "offset=" << (uint32_t)region->offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to reclaim region! Invalid device handle!\n"; + return halide_error_code_generic_error; + } + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to reclaim region! Invalid block allocator!\n"; + return halide_error_code_generic_error; + } + return block_allocator->reclaim(this, region); +} + +int VulkanMemoryAllocator::retain(void *user_context, MemoryRegion *region) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Retaining region (" + << "user_context=" << user_context << " " + << "region=" << (void *)(region) << " " + << "size=" << (uint32_t)region->size << " " + << "offset=" << (uint32_t)region->offset << ") ...\n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to retain region! Invalid device handle!\n"; + return halide_error_code_generic_error; + } + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to retain region! Invalid block allocator!\n"; + return halide_error_code_generic_error; + } + return block_allocator->retain(this, region); +} + +bool VulkanMemoryAllocator::collect(void *user_context) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Collecting unused memory (" + << "user_context=" << user_context << ") ... \n"; +#endif + if ((device == nullptr) || (physical_device == nullptr) || (block_allocator == nullptr)) { + return false; + } + return block_allocator->collect(this); +} + +int VulkanMemoryAllocator::release(void *user_context) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Releasing block allocator (" + << "user_context=" << user_context << ") ... \n"; +#endif + if ((device == nullptr) || (physical_device == nullptr)) { + error(user_context) << "VulkanMemoryAllocator: Unable to release allocator! Invalid device handle!\n"; + return halide_error_code_generic_error; + } + if (block_allocator == nullptr) { + error(user_context) << "VulkanMemoryAllocator: Unable to release allocator! Invalid block allocator!\n"; + return halide_error_code_generic_error; + } + + return block_allocator->release(this); +} + +int VulkanMemoryAllocator::destroy(void *user_context) { +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Destroying allocator (" + << "user_context=" << user_context << ") ... \n"; +#endif + if (block_allocator != nullptr) { + block_allocator->destroy(this); + } + region_count = 0; + region_byte_count = 0; + block_count = 0; + block_byte_count = 0; + return halide_error_code_success; +} + +const VulkanMemoryConfig & +VulkanMemoryAllocator::default_config() { + static VulkanMemoryConfig result; + return result; +} + +// -- + +int VulkanMemoryAllocator::allocate_block(void *instance_ptr, MemoryBlock *block) { + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + + if (block == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid pointer!\n"; + return halide_error_code_internal_error; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Allocating block (" + << "user_context=" << user_context << " " + << "block=" << (void *)(block) << " " + << "size=" << (uint64_t)block->size << ", " + << "dedicated=" << (block->dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(block->properties.usage) << " " + << "caching=" << halide_memory_caching_name(block->properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(block->properties.visibility) << ")\n"; +#endif + + // Find an appropriate memory type given the flags + uint32_t memory_type = instance->select_memory_type(user_context, instance->physical_device, block->properties, 0); + if (memory_type == invalid_memory_type) { + error(user_context) << "VulkanMemoryAllocator: Unable to find appropriate memory type for device!\n"; + return halide_error_code_generic_error; + } + + // Allocate memory + VkMemoryAllocateInfo alloc_info = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // struct type + nullptr, // struct extending this + block->size, // size of allocation in bytes + memory_type // memory type index from physical device + }; + + VkDeviceMemory *device_memory = (VkDeviceMemory *)vk_host_malloc(nullptr, sizeof(VkDeviceMemory), 0, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, instance->alloc_callbacks); + if (device_memory == nullptr) { + debug(nullptr) << "VulkanBlockAllocator: Unable to allocate block! Failed to allocate device memory handle!\n"; + return halide_error_code_out_of_memory; + } + + VkResult result = vkAllocateMemory(instance->device, &alloc_info, instance->alloc_callbacks, device_memory); + if (result != VK_SUCCESS) { + debug(nullptr) << "VulkanMemoryAllocator: Allocation failed! vkAllocateMemory returned: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_malloc_failed; + } +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkAllocateMemory: Allocated memory for device region (" << (uint64_t)block->size << " bytes) ...\n"; +#endif + + uint32_t usage_flags = instance->select_memory_usage(user_context, block->properties); + + VkBufferCreateInfo create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type + nullptr, // struct extending this + 0, // create flags + sizeof(uint32_t), // buffer size (in bytes) + usage_flags, // buffer usage flags + VK_SHARING_MODE_EXCLUSIVE, // sharing mode + 0, nullptr}; + + // Create a buffer to determine alignment requirements + VkBuffer buffer = {0}; + result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, &buffer); + if (result != VK_SUCCESS) { + debug(nullptr) << "VulkanMemoryAllocator: Failed to create buffer!\n\t" + << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_malloc_failed; + } + + VkMemoryRequirements memory_requirements = {0}; + vkGetBufferMemoryRequirements(instance->device, buffer, &memory_requirements); + vkDestroyBuffer(instance->device, buffer, instance->alloc_callbacks); + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Block allocated (" + << "size=" << (uint32_t)block->size << ", " + << "alignment=" << (uint32_t)memory_requirements.alignment << ", " + << "uniform_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minUniformBufferOffsetAlignment << ", " + << "storage_buffer_offset_alignment=" << (uint32_t)instance->physical_device_limits.minStorageBufferOffsetAlignment << ", " + << "dedicated=" << (block->dedicated ? "true" : "false") << ")\n"; +#endif + + if (usage_flags & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) { + block->properties.alignment = instance->physical_device_limits.minStorageBufferOffsetAlignment; + } else if (usage_flags & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { + block->properties.alignment = instance->physical_device_limits.minUniformBufferOffsetAlignment; + } else { + block->properties.alignment = memory_requirements.alignment; + } + block->handle = (void *)device_memory; + instance->block_byte_count += block->size; + instance->block_count++; + return halide_error_code_success; +} + +int VulkanMemoryAllocator::deallocate_block(void *instance_ptr, MemoryBlock *block) { + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Deallocating block (" + << "user_context=" << user_context << " " + << "block=" << (void *)(block) << ") ... \n"; +#endif + + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + + if (block == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid pointer!\n"; + return halide_error_code_internal_error; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanBlockAllocator: deallocating block (" + << "size=" << (uint32_t)block->size << ", " + << "dedicated=" << (block->dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(block->properties.usage) << " " + << "caching=" << halide_memory_caching_name(block->properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(block->properties.visibility) << ")\n"; +#endif + + if (block->handle == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid handle!\n"; + return halide_error_code_internal_error; + } + + VkDeviceMemory *device_memory = reinterpret_cast(block->handle); + if (device_memory == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to deallocate block! Invalid device memory handle!\n"; + return halide_error_code_internal_error; + } + + vkFreeMemory(instance->device, *device_memory, instance->alloc_callbacks); +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkFreeMemory: Deallocated memory for device region (" << (uint64_t)block->size << " bytes) ...\n"; +#endif + + if (instance->block_count > 0) { + instance->block_count--; + } else { + error(nullptr) << "VulkanRegionAllocator: Block counter invalid ... reseting to zero!\n"; + instance->block_count = 0; + } + + if (int64_t(instance->block_byte_count) - int64_t(block->size) >= 0) { + instance->block_byte_count -= block->size; + } else { + error(nullptr) << "VulkanRegionAllocator: Block byte counter invalid ... reseting to zero!\n"; + instance->block_byte_count = 0; + } + + block->handle = nullptr; + vk_host_free(nullptr, device_memory, instance->alloc_callbacks); + device_memory = nullptr; + return halide_error_code_success; +} + +size_t VulkanMemoryAllocator::blocks_allocated() const { + return block_count; +} + +size_t VulkanMemoryAllocator::bytes_allocated_for_blocks() const { + return block_byte_count; +} + +uint32_t VulkanMemoryAllocator::select_memory_type(void *user_context, + VkPhysicalDevice physical_device, + MemoryProperties properties, + uint32_t required_flags) const { + + uint32_t want_flags = 0; //< preferred memory flags for requested access type + uint32_t need_flags = 0; //< must have in order to enable requested access + switch (properties.visibility) { + case MemoryVisibility::HostOnly: + want_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + break; + case MemoryVisibility::DeviceOnly: + need_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case MemoryVisibility::DeviceToHost: + need_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + want_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case MemoryVisibility::HostToDevice: + need_flags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + break; + case MemoryVisibility::DefaultVisibility: + case MemoryVisibility::InvalidVisibility: + default: + error(nullptr) << "VulkanMemoryAllocator: Unable to convert type! Invalid memory visibility request!\n\t" + << "visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; + return invalid_memory_type; + }; + + switch (properties.caching) { + case MemoryCaching::CachedCoherent: + if (need_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + want_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + } + break; + case MemoryCaching::UncachedCoherent: + if (need_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + want_flags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + } + break; + case MemoryCaching::Cached: + if (need_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + want_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + break; + case MemoryCaching::Uncached: + case MemoryCaching::DefaultCaching: + break; + case MemoryCaching::InvalidCaching: + default: + error(user_context) << "VulkanMemoryAllocator: Unable to convert type! Invalid memory caching request!\n\t" + << "caching=" << halide_memory_caching_name(properties.caching) << "\n"; + return invalid_memory_type; + }; + + VkPhysicalDeviceMemoryProperties device_memory_properties; + vkGetPhysicalDeviceMemoryProperties(physical_device, &device_memory_properties); + + uint32_t result = invalid_memory_type; + for (uint32_t i = 0; i < device_memory_properties.memoryTypeCount; ++i) { + + // if required flags are given, see if the memory type matches the requirement + if (required_flags) { + if (((required_flags >> i) & 1) == 0) { + continue; + } + } + + const VkMemoryPropertyFlags properties = device_memory_properties.memoryTypes[i].propertyFlags; + if (need_flags) { + if ((properties & need_flags) != need_flags) { + continue; + } + } + + if (want_flags) { + if ((properties & want_flags) != want_flags) { + continue; + } + } + + result = i; + break; + } + + if (result == invalid_memory_type) { + error(user_context) << "VulkanBlockAllocator: Failed to find appropriate memory type for given properties:\n\t" + << "usage=" << halide_memory_usage_name(properties.usage) << " " + << "caching=" << halide_memory_caching_name(properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; + return invalid_memory_type; + } + + return result; +} + +// -- + +int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *region) { + + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Allocating region (" + << "user_context=" << user_context << " " + << "region=" << (void *)(region) << ") ... \n"; +#endif + + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanRegionAllocator: Unable to allocate region! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + + if (region == nullptr) { + error(user_context) << "VulkanRegionAllocator: Unable to allocate region! Invalid pointer!\n"; + return halide_error_code_internal_error; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanRegionAllocator: Allocating region (" + << "size=" << (uint32_t)region->size << ", " + << "offset=" << (uint32_t)region->offset << ", " + << "dedicated=" << (region->dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(region->properties.usage) << " " + << "caching=" << halide_memory_caching_name(region->properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(region->properties.visibility) << ")\n"; +#endif + + uint32_t usage_flags = instance->select_memory_usage(user_context, region->properties); + + VkBufferCreateInfo create_info = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // struct type + nullptr, // struct extending this + 0, // create flags + region->size, // buffer size (in bytes) + usage_flags, // buffer usage flags + VK_SHARING_MODE_EXCLUSIVE, // sharing mode + 0, nullptr}; + + VkBuffer *buffer = (VkBuffer *)vk_host_malloc(nullptr, sizeof(VkBuffer), 0, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, instance->alloc_callbacks); + if (buffer == nullptr) { + error(user_context) << "VulkanRegionAllocator: Unable to allocate region! Failed to allocate buffer handle!\n"; + return halide_error_code_out_of_memory; + } + + VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer); + if (result != VK_SUCCESS) { + error(user_context) << "VulkanRegionAllocator: Failed to create buffer!\n\t" + << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n"; + return halide_error_code_device_malloc_failed; + } +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; +#endif + + RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region); + if (region_allocator == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid region allocator!\n"; + return halide_error_code_internal_error; + } + + BlockResource *block_resource = region_allocator->block_resource(); + if (block_resource == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid block resource handle!\n"; + return halide_error_code_internal_error; + } + + VkDeviceMemory *device_memory = reinterpret_cast(block_resource->memory.handle); + if (device_memory == nullptr) { + error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid device memory handle!\n"; + return halide_error_code_internal_error; + } + + // Finally, bind buffer to the device memory + result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset); + if (result != VK_SUCCESS) { + error(user_context) << "VulkanRegionAllocator: Failed to bind buffer!\n\t" + << "vkBindBufferMemory returned: " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + region->handle = (void *)buffer; + region->is_owner = true; + instance->region_byte_count += region->size; + instance->region_count++; + return halide_error_code_success; +} + +int VulkanMemoryAllocator::deallocate_region(void *instance_ptr, MemoryRegion *region) { + VulkanMemoryAllocator *instance = reinterpret_cast(instance_ptr); + if (instance == nullptr) { + return halide_error_code_internal_error; + } + + void *user_context = instance->owner_context; +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanMemoryAllocator: Deallocating region (" + << "user_context=" << user_context << " " + << "region=" << (void *)(region) << ") ... \n"; +#endif + + if ((instance->device == nullptr) || (instance->physical_device == nullptr)) { + error(user_context) << "VulkanRegionAllocator: Unable to deallocate region! Invalid device handle!\n"; + return halide_error_code_internal_error; + } + + if (region == nullptr) { + error(user_context) << "VulkanRegionAllocator: Unable to deallocate region! Invalid pointer!\n"; + return halide_error_code_internal_error; + } + +#if defined(HL_VK_DEBUG_MEM) + debug(nullptr) << "VulkanRegionAllocator: Deallocating region (" + << "size=" << (uint32_t)region->size << ", " + << "offset=" << (uint32_t)region->offset << ", " + << "dedicated=" << (region->dedicated ? "true" : "false") << " " + << "usage=" << halide_memory_usage_name(region->properties.usage) << " " + << "caching=" << halide_memory_caching_name(region->properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(region->properties.visibility) << ")\n"; +#endif + + if (region->handle == nullptr) { + error(user_context) << "VulkanRegionAllocator: Unable to deallocate region! Invalid handle!\n"; + return halide_error_code_internal_error; + } + + VkBuffer *buffer = reinterpret_cast(region->handle); + if (buffer == nullptr) { + error(user_context) << "VulkanRegionAllocator: Unable to deallocate region! Invalid buffer handle!\n"; + return halide_error_code_internal_error; + } + + vkDestroyBuffer(instance->device, *buffer, instance->alloc_callbacks); +#ifdef DEBUG_RUNTIME + debug(nullptr) << "vkDestroyBuffer: Destroyed buffer for device region (" << (uint64_t)region->size << " bytes) ...\n"; +#endif + region->handle = nullptr; + if (instance->region_count > 0) { + instance->region_count--; + } else { + error(nullptr) << "VulkanRegionAllocator: Region counter invalid ... reseting to zero!\n"; + instance->region_count = 0; + return halide_error_code_internal_error; + } + + if (int64_t(instance->region_byte_count) - int64_t(region->size) >= 0) { + instance->region_byte_count -= region->size; + } else { + error(nullptr) << "VulkanRegionAllocator: Region byte counter invalid ... reseting to zero!\n"; + instance->region_byte_count = 0; + return halide_error_code_internal_error; + } + vk_host_free(nullptr, buffer, instance->alloc_callbacks); + buffer = nullptr; + return halide_error_code_success; +} + +size_t VulkanMemoryAllocator::regions_allocated() const { + return region_count; +} + +size_t VulkanMemoryAllocator::bytes_allocated_for_regions() const { + return region_byte_count; +} + +uint32_t VulkanMemoryAllocator::select_memory_usage(void *user_context, MemoryProperties properties) const { + uint32_t result = 0; + switch (properties.usage) { + case MemoryUsage::UniformStorage: + result |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + break; + case MemoryUsage::DynamicStorage: + case MemoryUsage::StaticStorage: + result |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + break; + case MemoryUsage::TransferSrc: + result |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + break; + case MemoryUsage::TransferDst: + result |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + break; + case MemoryUsage::TransferSrcDst: + result |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + break; + case MemoryUsage::DefaultUsage: + case MemoryUsage::InvalidUsage: + default: + error(user_context) << "VulkanRegionAllocator: Unable to convert type! Invalid memory usage request!\n\t" + << "usage=" << halide_memory_usage_name(properties.usage) << "\n"; + return invalid_usage_flags; + }; + + if (result == invalid_usage_flags) { + error(user_context) << "VulkanRegionAllocator: Failed to find appropriate memory usage for given properties:\n\t" + << "usage=" << halide_memory_usage_name(properties.usage) << " " + << "caching=" << halide_memory_caching_name(properties.caching) << " " + << "visibility=" << halide_memory_visibility_name(properties.visibility) << "\n"; + return invalid_usage_flags; + } + + return result; +} + +// -------------------------------------------------------------------------- + +namespace { + +// -------------------------------------------------------------------------- +// Halide System allocator for host allocations +void *vk_system_malloc(void *user_context, size_t size) { + return malloc(size); +} + +void vk_system_free(void *user_context, void *ptr) { + free(ptr); +} + +// Vulkan host-side allocation +void *vk_host_malloc(void *user_context, size_t size, size_t alignment, VkSystemAllocationScope scope, const VkAllocationCallbacks *callbacks) { + if (callbacks) { + return callbacks->pfnAllocation(user_context, size, alignment, scope); + } else { + return vk_system_malloc(user_context, size); + } +} + +void vk_host_free(void *user_context, void *ptr, const VkAllocationCallbacks *callbacks) { + if (callbacks) { + return callbacks->pfnFree(user_context, ptr); + } else { + return vk_system_free(user_context, ptr); + } +} + +VulkanMemoryAllocator *vk_create_memory_allocator(void *user_context, + VkDevice device, + VkPhysicalDevice physical_device, + const VkAllocationCallbacks *alloc_callbacks) { + + SystemMemoryAllocatorFns system_allocator = {vk_system_malloc, vk_system_free}; + VulkanMemoryConfig config = memory_allocator_config; + + // Parse the allocation config string (if specified). + // + // `HL_VK_ALLOC_CONFIG=N:N:N` will tell Halide to configure the Vulkan memory + // allocator use the given constraints specified as three integer values + // separated by a `:` or `;`. These values correspond to `minimum_block_size`, + // `maximum_block_size` and `maximum_block_count`. + // + const char *alloc_config = vk_get_alloc_config_internal(user_context); + if (!StringUtils::is_empty(alloc_config)) { + StringTable alloc_config_values; + alloc_config_values.parse(user_context, alloc_config, HL_VK_ENV_DELIM); + if (alloc_config_values.size() > 0) { + config.maximum_pool_size = atoi(alloc_config_values[0]) * 1024 * 1024; + print(user_context) << "Vulkan: Configuring allocator with " << (uint32_t)config.maximum_pool_size << " for maximum pool size (in bytes)\n"; + } + if (alloc_config_values.size() > 1) { + config.minimum_block_size = atoi(alloc_config_values[1]) * 1024 * 1024; + print(user_context) << "Vulkan: Configuring allocator with " << (uint32_t)config.minimum_block_size << " for minimum block size (in bytes)\n"; + } + if (alloc_config_values.size() > 2) { + config.maximum_block_size = atoi(alloc_config_values[2]) * 1024 * 1024; + print(user_context) << "Vulkan: Configuring allocator with " << (uint32_t)config.maximum_block_size << " for maximum block size (in bytes)\n"; + } + if (alloc_config_values.size() > 3) { + config.maximum_block_count = atoi(alloc_config_values[3]); + print(user_context) << "Vulkan: Configuring allocator with " << (uint32_t)config.maximum_block_count << " for maximum block count\n"; + } + if (alloc_config_values.size() > 4) { + config.nearest_multiple = atoi(alloc_config_values[4]); + print(user_context) << "Vulkan: Configuring allocator with " << (uint32_t)config.nearest_multiple << " for nearest multiple\n"; + } + } + + return VulkanMemoryAllocator::create(user_context, + config, device, physical_device, + system_allocator, alloc_callbacks); +} + +int vk_destroy_memory_allocator(void *user_context, VulkanMemoryAllocator *allocator) { + if (allocator != nullptr) { + VulkanMemoryAllocator::destroy(user_context, allocator); + allocator = nullptr; + } + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +int vk_clear_device_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + VkCommandPool command_pool, + VkQueue command_queue, + VkBuffer device_buffer) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_clear_device_buffer (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "command_pool: " << (void *)command_pool << ", " + << "command_queue: " << (void *)command_queue << ", " + << "device_buffer: " << (void *)device_buffer << ")\n"; +#endif + + // create a command buffer + VkCommandBuffer command_buffer; + int error_code = vk_create_command_buffer(user_context, allocator, command_pool, &command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create command buffer!\n"; + return error_code; + } + + // begin the command buffer + VkCommandBufferBeginInfo command_buffer_begin_info = + { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, // flags + nullptr // pointer to parent command buffer + }; + + VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + // fill buffer with zero values up to the size of the buffer + vkCmdFillBuffer(command_buffer, device_buffer, 0, VK_WHOLE_SIZE, 0); + + // end the command buffer + result = vkEndCommandBuffer(command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + // submit the command buffer + VkSubmitInfo submit_info = + { + VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type + nullptr, // pointer to struct extending this + 0, // wait semaphore count + nullptr, // semaphores + nullptr, // pipeline stages where semaphore waits occur + 1, // how many command buffers to execute + &command_buffer, // the command buffers + 0, // number of semaphores to signal + nullptr // the semaphores to signal + }; + + result = vkQueueSubmit(command_queue, 1, &submit_info, 0); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueSubmit returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + // wait for memset to finish + result = vkQueueWaitIdle(command_queue); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueWaitIdle returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + error_code = vk_destroy_command_buffer(user_context, allocator, command_pool, command_buffer); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy command buffer!\n"; + return error_code; + } + + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +} // namespace +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +// -------------------------------------------------------------------------- + +extern "C" { + +// -------------------------------------------------------------------------- + +WEAK void halide_vulkan_set_allocation_callbacks(const VkAllocationCallbacks *callbacks) { + using namespace Halide::Runtime::Internal::Vulkan; + ScopedSpinLock lock(&custom_allocation_callbacks_lock); + custom_allocation_callbacks = callbacks; +} + +WEAK const VkAllocationCallbacks *halide_vulkan_get_allocation_callbacks(void *user_context) { + using namespace Halide::Runtime::Internal::Vulkan; + ScopedSpinLock lock(&custom_allocation_callbacks_lock); + return custom_allocation_callbacks; +} + +// -------------------------------------------------------------------------- + +} // extern "C" + +#endif // HALIDE_RUNTIME_VULKAN_MEMORY_H diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h new file mode 100644 index 000000000000..d4b7bf866d11 --- /dev/null +++ b/src/runtime/vulkan_resources.h @@ -0,0 +1,1551 @@ +#ifndef HALIDE_RUNTIME_VULKAN_RESOURCES_H +#define HALIDE_RUNTIME_VULKAN_RESOURCES_H + +#include "vulkan_internal.h" +#include "vulkan_memory.h" + +// -------------------------------------------------------------------------- + +namespace Halide { +namespace Runtime { +namespace Internal { +namespace Vulkan { + +// Defines the specialization constants used for dynamically overiding the dispatch size +struct VulkanWorkgroupSizeBinding { + uint32_t constant_id[3] = {0}; // zero if unused +}; + +// Data used to override specialization constants for dynamic dispatching +struct VulkanDispatchData { + uint32_t global_size[3] = {0}; // aka blocks + uint32_t local_size[3] = {0}; // aka threads + uint32_t shared_mem_bytes = 0; + VulkanWorkgroupSizeBinding local_size_binding = {}; +}; + +// Specialization constant binding information +struct VulkanSpecializationConstant { + uint32_t constant_id = 0; + uint32_t type_size = 0; + const char *constant_name = nullptr; +}; + +// Shared memory allocation variable information +struct VulkanSharedMemoryAllocation { + uint32_t constant_id = 0; // specialization constant to override allocation array size (or zero if unused) + uint32_t type_size = 0; + uint32_t array_size = 0; + const char *variable_name = nullptr; +}; + +// Entry point metadata for shader modules +struct VulkanShaderBinding { + const char *entry_point_name = nullptr; + VulkanDispatchData dispatch_data = {}; + VkDescriptorPool descriptor_pool = {0}; + VkDescriptorSet descriptor_set = {0}; + VkPipeline compute_pipeline = {0}; + uint32_t uniform_buffer_count = 0; + uint32_t storage_buffer_count = 0; + uint32_t specialization_constants_count = 0; + uint32_t shared_memory_allocations_count = 0; + VulkanSpecializationConstant *specialization_constants = nullptr; + VulkanSharedMemoryAllocation *shared_memory_allocations = nullptr; + uint32_t bindings_count = 0; + MemoryRegion *args_region = nullptr; +}; + +// Compilation cache for compiled shader modules +struct VulkanCompilationCacheEntry { + VkShaderModule shader_module = {0}; + VkDescriptorSetLayout *descriptor_set_layouts = nullptr; + VkPipelineLayout pipeline_layout = {0}; + uint32_t shader_count = 0; + VulkanShaderBinding *shader_bindings = nullptr; +}; + +WEAK Halide::Internal::GPUCompilationCache compilation_cache; + +// -------------------------------------------------------------------------- + +namespace { // internalize + +// -------------------------------------------------------------------------- + +int vk_create_command_pool(void *user_context, VulkanMemoryAllocator *allocator, uint32_t queue_index, VkCommandPool *command_pool) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_command_pool (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "queue_index: " << queue_index << ")\n"; +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create command pool ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + VkCommandPoolCreateInfo command_pool_info = + { + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, // flags. Assume transient short-lived single-use command buffers + queue_index // queue family index corresponding to the compute command queue + }; + + VkResult result = vkCreateCommandPool(allocator->current_device(), &command_pool_info, allocator->callbacks(), command_pool); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: Failed to create command pool!\n"; + return halide_error_code_generic_error; + } + return halide_error_code_success; +} + +int vk_destroy_command_pool(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_command_pool (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "command_pool: " << (void *)command_pool << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy command pool ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + vkDestroyCommandPool(allocator->current_device(), command_pool, allocator->callbacks()); + return halide_error_code_success; +} + +// -- + +int vk_create_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool, VkCommandBuffer *command_buffer) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_command_buffer (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "command_pool: " << (void *)command_pool << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create command buffer ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + VkCommandBufferAllocateInfo command_buffer_info = + { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // struct type + nullptr, // pointer to struct extending this + command_pool, // command pool for allocation + VK_COMMAND_BUFFER_LEVEL_PRIMARY, // command buffer level + 1 // number to allocate + }; + + VkResult result = vkAllocateCommandBuffers(allocator->current_device(), &command_buffer_info, command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: Failed to allocate command buffers!\n"; + return halide_error_code_generic_error; + } + return halide_error_code_success; +} + +int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool, VkCommandBuffer command_buffer) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_command_buffer (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "command_pool: " << (void *)command_pool << ", " + << "command_buffer: " << (void *)command_buffer << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy command buffer ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + vkFreeCommandBuffers(allocator->current_device(), command_pool, 1, &command_buffer); + return halide_error_code_success; +} + +int vk_fill_command_buffer_with_dispatch_call(void *user_context, + VkDevice device, + VkCommandBuffer command_buffer, + VkPipeline compute_pipeline, + VkPipelineLayout pipeline_layout, + VkDescriptorSet descriptor_set, + uint32_t descriptor_set_index, + int blocksX, int blocksY, int blocksZ) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", " + << "device: " << (void *)device << ", " + << "command_buffer: " << (void *)command_buffer << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ", " + << "descriptor_set: " << (void *)descriptor_set << ", " + << "descriptor_set_index: " << descriptor_set_index << ", " + << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n"; +#endif + + VkCommandBufferBeginInfo command_buffer_begin_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // struct type + nullptr, // pointer to struct extending this + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, // flags + nullptr // pointer to parent command buffer + }; + + VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info); + if (result != VK_SUCCESS) { + error(user_context) << "vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline); + vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, + descriptor_set_index, 1, &descriptor_set, 0, nullptr); + vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ); + + result = vkEndCommandBuffer(command_buffer); + if (result != VK_SUCCESS) { + error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + return halide_error_code_success; +} + +int vk_submit_command_buffer(void *user_context, VkQueue queue, VkCommandBuffer command_buffer) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_submit_command_buffer (user_context: " << user_context << ", " + << "queue: " << (void *)queue << ", " + << "command_buffer: " << (void *)command_buffer << ")\n"; +#endif + + VkSubmitInfo submit_info = + { + VK_STRUCTURE_TYPE_SUBMIT_INFO, // struct type + nullptr, // pointer to struct extending this + 0, // wait semaphore count + nullptr, // semaphores + nullptr, // pipeline stages where semaphore waits occur + 1, // how many command buffers to execute + &command_buffer, // the command buffers + 0, // number of semaphores to signal + nullptr // the semaphores to signal + }; + + VkResult result = vkQueueSubmit(queue, 1, &submit_info, 0); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkQueueSubmit returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + return halide_error_code_success; +} + +// -- + +bool vk_needs_scalar_uniform_buffer(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { + int i = 0; + while (arg_sizes[i] > 0) { + if (!arg_is_buffer[i]) { + return true; + } + i++; + } + return false; +} + +uint32_t vk_count_bindings_for_descriptor_set(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { + + // first binding is for passing scalar parameters in a buffer (if necessary) + uint32_t bindings_count = vk_needs_scalar_uniform_buffer(user_context, arg_sizes, args, arg_is_buffer); + + int i = 0; + while (arg_sizes[i] > 0) { + if (arg_is_buffer[i]) { + bindings_count++; + } + i++; + } + return bindings_count; +} + +// -- + +int vk_create_descriptor_pool(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t uniform_buffer_count, + uint32_t storage_buffer_count, + VkDescriptorPool *descriptor_pool) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_descriptor_pool (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", " + << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create descriptor pool ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config pool_config; + pool_config.entry_size = sizeof(VkDescriptorPoolSize); + pool_config.minimum_capacity = (uniform_buffer_count ? 1 : 0) + (storage_buffer_count ? 1 : 0); + BlockStorage pool_sizes(user_context, pool_config); + + // First binding is reserved for passing scalar parameters as a uniform buffer + if (uniform_buffer_count > 0) { + VkDescriptorPoolSize uniform_buffer_size = { + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type + uniform_buffer_count // all kernel args are packed into uniform buffers + }; + pool_sizes.append(user_context, &uniform_buffer_size); + } + + if (storage_buffer_count > 0) { + VkDescriptorPoolSize storage_buffer_size = { + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type + storage_buffer_count // all halide buffers are passed as storage buffers + }; + pool_sizes.append(user_context, &storage_buffer_size); + } + + VkDescriptorPoolCreateInfo descriptor_pool_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, // struct type + nullptr, // point to struct extending this + 0, // flags + 1, // this pool will only be used for creating one descriptor set! + (uint32_t)pool_sizes.size(), // pool size count + (const VkDescriptorPoolSize *)pool_sizes.data() // ptr to descriptr pool sizes + }; + + VkResult result = vkCreateDescriptorPool(allocator->current_device(), &descriptor_pool_info, allocator->callbacks(), descriptor_pool); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: Failed to create descriptor pool! vkCreateDescriptorPool returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + return halide_error_code_success; +} + +int vk_destroy_descriptor_pool(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorPool descriptor_pool) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_descriptor_pool (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "descriptor_pool: " << (void *)descriptor_pool << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy descriptor pool ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + vkDestroyDescriptorPool(allocator->current_device(), descriptor_pool, allocator->callbacks()); + return halide_error_code_success; +} + +// -- + +int vk_create_descriptor_set_layout(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t uniform_buffer_count, + uint32_t storage_buffer_count, + VkDescriptorSetLayout *layout) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_descriptor_set_layout (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "uniform_buffer_count: " << uniform_buffer_count << ", " + << "storage_buffer_count: " << storage_buffer_count << ", " + << "layout: " << (void *)layout << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create descriptor set layout ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config layout_config; + layout_config.entry_size = sizeof(VkDescriptorSetLayoutBinding); + layout_config.minimum_capacity = uniform_buffer_count + storage_buffer_count; + BlockStorage layout_bindings(user_context, layout_config); + + // add all uniform buffers first + for (uint32_t n = 0; n < uniform_buffer_count; ++n) { + VkDescriptorSetLayoutBinding uniform_buffer_layout = { + (uint32_t)layout_bindings.size(), // binding index + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type + 1, // descriptor count + VK_SHADER_STAGE_COMPUTE_BIT, // stage flags + nullptr // immutable samplers + }; + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " [" << (uint32_t)layout_bindings.size() << "] : UNIFORM_BUFFER\n"; +#endif + + layout_bindings.append(user_context, &uniform_buffer_layout); + } + + // Add all other storage buffers + for (uint32_t n = 0; n < storage_buffer_count; ++n) { + + // halide buffers will be passed as STORAGE_BUFFERS + VkDescriptorSetLayoutBinding storage_buffer_layout = { + (uint32_t)layout_bindings.size(), // binding index + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type + 1, // descriptor count + VK_SHADER_STAGE_COMPUTE_BIT, // stage flags + nullptr // immutable samplers + }; +#ifdef DEBUG_RUNTIME + debug(user_context) + << " [" << (uint32_t)layout_bindings.size() << "] : STORAGE_BUFFER\n"; +#endif + + layout_bindings.append(user_context, &storage_buffer_layout); + } + + // Create the LayoutInfo struct + VkDescriptorSetLayoutCreateInfo layout_info = { + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, // structure type + nullptr, // pointer to a struct extending this info + 0, // flags + (uint32_t)layout_bindings.size(), // binding count + (VkDescriptorSetLayoutBinding *)layout_bindings.data() // pointer to layout bindings array + }; + + // Create the descriptor set layout + VkResult result = vkCreateDescriptorSetLayout(allocator->current_device(), &layout_info, allocator->callbacks(), layout); + if (result != VK_SUCCESS) { + error(user_context) << "vkCreateDescriptorSetLayout returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + return halide_error_code_success; +} + +int vk_destroy_descriptor_set_layout(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSetLayout descriptor_set_layout) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_descriptor_set_layout (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "layout: " << (void *)descriptor_set_layout << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy descriptor set layout ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + vkDestroyDescriptorSetLayout(allocator->current_device(), descriptor_set_layout, allocator->callbacks()); + return halide_error_code_success; +} + +// -- + +int vk_create_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkDescriptorSetLayout descriptor_set_layout, + VkDescriptorPool descriptor_pool, + VkDescriptorSet *descriptor_set) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_descriptor_set (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "descriptor_set_layout: " << (void *)descriptor_set_layout << ", " + << "descriptor_pool: " << (void *)descriptor_pool << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + VkDescriptorSetAllocateInfo descriptor_set_info = + { + VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // struct type + nullptr, // pointer to struct extending this + descriptor_pool, // pool from which to allocate sets + 1, // number of descriptor sets + &descriptor_set_layout // pointer to array of descriptor set layouts + }; + + VkResult result = vkAllocateDescriptorSets(allocator->current_device(), &descriptor_set_info, descriptor_set); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkAllocateDescriptorSets returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + return halide_error_code_success; +} + +int vk_update_descriptor_set(void *user_context, + VulkanMemoryAllocator *allocator, + VkBuffer *scalar_args_buffer, + size_t uniform_buffer_count, + size_t storage_buffer_count, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[], + VkDescriptorSet descriptor_set) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_update_descriptor_set (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", " + << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", " + << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", " + << "descriptor_set: " << (void *)descriptor_set << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + BlockStorage::Config dbi_config; + dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + dbi_config.entry_size = sizeof(VkDescriptorBufferInfo); + BlockStorage descriptor_buffer_info(user_context, dbi_config); + + BlockStorage::Config wds_config; + wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count; + wds_config.entry_size = sizeof(VkWriteDescriptorSet); + BlockStorage write_descriptor_set(user_context, wds_config); + + // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER + VkDescriptorBufferInfo *scalar_args_entry = nullptr; + if (scalar_args_buffer != nullptr) { + VkDescriptorBufferInfo scalar_args_descriptor_buffer_info = { + *scalar_args_buffer, // the buffer + 0, // offset + VK_WHOLE_SIZE // range + }; + descriptor_buffer_info.append(user_context, &scalar_args_descriptor_buffer_info); + scalar_args_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back(); + +#ifdef DEBUG_RUNTIME + debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] UNIFORM_BUFFER : " + << "buffer=" << (void *)scalar_args_buffer << " " + << "offset=" << (uint32_t)(0) << " " + << "size=VK_WHOLE_SIZE\n"; +#endif + VkWriteDescriptorSet uniform_buffer_write_descriptor_set = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // struct type + nullptr, // pointer to struct extending this + descriptor_set, // descriptor set to update + 0, // binding slot + 0, // array elem + 1, // num to update + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, // descriptor type + nullptr, // for images + scalar_args_entry, // info for buffer + nullptr // for texel buffers + }; + write_descriptor_set.append(user_context, &uniform_buffer_write_descriptor_set); + } + + // Add all the other device buffers as STORAGE BUFFERs + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (arg_is_buffer[i]) { + + // get the allocated region for the buffer + MemoryRegion *device_region = reinterpret_cast(((halide_buffer_t *)args[i])->device); + MemoryRegion *owner = allocator->owner_of(user_context, device_region); + + // retrieve the buffer from the region + VkBuffer *device_buffer = reinterpret_cast(owner->handle); + if (device_buffer == nullptr) { + error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + VkDeviceSize range_offset = device_region->range.head_offset; + VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset; + halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0); + VkDescriptorBufferInfo device_buffer_info = { + *device_buffer, // the buffer + range_offset, // range offset + range_size // range size + }; + descriptor_buffer_info.append(user_context, &device_buffer_info); + VkDescriptorBufferInfo *device_buffer_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back(); + +#ifdef DEBUG_RUNTIME + debug(user_context) << " [" << (uint32_t)write_descriptor_set.size() << "] STORAGE_BUFFER : " + << "region=" << (void *)device_region << " " + << "buffer=" << (void *)device_buffer << " " + << "offset=" << (uint32_t)(range_offset) << " " + << "size=" << (uint32_t)(range_size) << "\n"; +#endif + + VkWriteDescriptorSet storage_buffer_write_descriptor_set = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, // struct type + nullptr, // pointer to struct extending this + descriptor_set, // descriptor set to update + (uint32_t)write_descriptor_set.size(), // binding slot + 0, // array elem + 1, // num to update + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptor type + nullptr, // for images + device_buffer_entry, // info for buffer + nullptr // for texel buffers + }; + write_descriptor_set.append(user_context, &storage_buffer_write_descriptor_set); + } + } + + // issue the update call to populate the descriptor set + vkUpdateDescriptorSets(allocator->current_device(), (uint32_t)write_descriptor_set.size(), (const VkWriteDescriptorSet *)write_descriptor_set.data(), 0, nullptr); + return halide_error_code_success; +} + +// -- + +size_t vk_estimate_scalar_uniform_buffer_size(void *user_context, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { + int i = 0; + int scalar_uniform_buffer_size = 0; + while (arg_sizes[i] > 0) { + if (!arg_is_buffer[i]) { + scalar_uniform_buffer_size += arg_sizes[i]; + } + i++; + } + return scalar_uniform_buffer_size; +} + +MemoryRegion *vk_create_scalar_uniform_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + size_t scalar_buffer_size) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_scalar_uniform_buffer (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "scalar_buffer_size: " << (uint32_t)scalar_buffer_size << ")\n"; +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create scalar uniform buffer ... invalid allocator pointer!\n"; + return nullptr; + } + + MemoryRequest request = {0}; + request.size = scalar_buffer_size; + request.properties.usage = MemoryUsage::UniformStorage; + request.properties.caching = MemoryCaching::UncachedCoherent; + request.properties.visibility = MemoryVisibility::HostToDevice; + + // allocate a new region + MemoryRegion *region = allocator->reserve(user_context, request); + if ((region == nullptr) || (region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to create scalar uniform buffer ... unable to allocate device memory!\n"; + return nullptr; + } + + // return the allocated region for the uniform buffer + return region; +} + +int vk_update_scalar_uniform_buffer(void *user_context, + VulkanMemoryAllocator *allocator, + MemoryRegion *region, + size_t arg_sizes[], + void *args[], + int8_t arg_is_buffer[]) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_update_scalar_uniform_buffer (user_context: " << user_context << ", " + << "region: " << (void *)region << ")\n"; +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if ((region == nullptr) || (region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... invalid memory region!\n"; + return halide_error_code_internal_error; + } + + // map the region to a host ptr + uint8_t *host_ptr = (uint8_t *)allocator->map(user_context, region); + if (host_ptr == nullptr) { + error(user_context) << "Vulkan: Failed to update scalar uniform buffer ... unable to map host pointer to device memory!\n"; + return halide_error_code_internal_error; + } + + // copy to the (host-visible/coherent) scalar uniform buffer + size_t arg_offset = 0; + for (size_t i = 0; arg_sizes[i] > 0; i++) { + if (!arg_is_buffer[i]) { + memcpy(host_ptr + arg_offset, args[i], arg_sizes[i]); + arg_offset += arg_sizes[i]; + } + } + + // unmap the pointer to the buffer for the region + allocator->unmap(user_context, region); + return halide_error_code_success; +} + +int vk_destroy_scalar_uniform_buffer(void *user_context, VulkanMemoryAllocator *allocator, + MemoryRegion *scalar_args_region) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_scalar_uniform_buffer (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "scalar_args_region: " << (void *)scalar_args_region << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy scalar uniform buffer ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if (!scalar_args_region) { + return halide_error_code_success; + } + + int error_code = halide_error_code_success; + if (halide_can_reuse_device_allocations(user_context)) { + error_code = allocator->release(user_context, scalar_args_region); + } else { + error_code = allocator->reclaim(user_context, scalar_args_region); + } + return error_code; +} + +// -- + +int vk_create_pipeline_layout(void *user_context, + VulkanMemoryAllocator *allocator, + uint32_t descriptor_set_count, + VkDescriptorSetLayout *descriptor_set_layouts, + VkPipelineLayout *pipeline_layout) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_pipeline_layout (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "descriptor_set_count: " << descriptor_set_count << ", " + << "descriptor_set_layouts: " << (void *)descriptor_set_layouts << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create pipeline layout ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + VkPipelineLayoutCreateInfo pipeline_layout_info = { + VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // structure type + nullptr, // pointer to a structure extending this + 0, // flags + descriptor_set_count, // number of descriptor sets + descriptor_set_layouts, // pointer to the descriptor sets + 0, // number of push constant ranges + nullptr // pointer to push constant range structs + }; + + VkResult result = vkCreatePipelineLayout(allocator->current_device(), &pipeline_layout_info, allocator->callbacks(), pipeline_layout); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: vkCreatePipelineLayout returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + return halide_error_code_success; +} + +int vk_destroy_pipeline_layout(void *user_context, + VulkanMemoryAllocator *allocator, + VkPipelineLayout pipeline_layout) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_pipeline_layout (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ")\n"; +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy pipeline layout ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + vkDestroyPipelineLayout(allocator->current_device(), pipeline_layout, allocator->callbacks()); + return halide_error_code_success; +} + +// -- + +int vk_create_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + const char *pipeline_name, + VkShaderModule shader_module, + VkPipelineLayout pipeline_layout, + VkSpecializationInfo *specialization_info, + VkPipeline *compute_pipeline) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_create_compute_pipeline (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "shader_module: " << (void *)shader_module << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to create compute pipeline ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + VkComputePipelineCreateInfo compute_pipeline_info = + { + VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // structure type + nullptr, // pointer to a structure extending this + 0, // flags + // VkPipelineShaderStageCreatInfo + { + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // structure type + nullptr, // pointer to a structure extending this + 0, // flags + VK_SHADER_STAGE_COMPUTE_BIT, // compute stage shader + shader_module, // shader module + pipeline_name, // entry point name + specialization_info, // pointer to VkSpecializationInfo struct + }, + pipeline_layout, // pipeline layout + 0, // base pipeline handle for derived pipeline + 0 // base pipeline index for derived pipeline + }; + + VkResult result = vkCreateComputePipelines(allocator->current_device(), 0, 1, &compute_pipeline_info, allocator->callbacks(), compute_pipeline); + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: Failed to create compute pipeline! vkCreateComputePipelines returned " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + return halide_error_code_success; +} + +int vk_setup_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + VulkanShaderBinding *shader_bindings, + VulkanDispatchData *dispatch_data, + VkShaderModule shader_module, + VkPipelineLayout pipeline_layout, + VkPipeline *compute_pipeline) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_setup_compute_pipeline (user_context: " << user_context << ", " + << "entry_point_name: '" << shader_bindings->entry_point_name << "', " + << "allocator: " << (void *)allocator << ", " + << "shader_bindings: " << (void *)shader_bindings << ", " + << "dispatch_data: " << (void *)dispatch_data << ", " + << "shader_module: " << (void *)shader_module << ", " + << "pipeline_layout: " << (void *)pipeline_layout << ")\n"; +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + if (shader_bindings == nullptr) { + error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid shader bindings!\n"; + return halide_error_code_generic_error; + } + + if (shader_bindings == nullptr) { + error(user_context) << "Vulkan: Failed to setup compute pipeline ... invalid dispatch data!\n"; + return halide_error_code_generic_error; + } + + VkResult result = VK_SUCCESS; + const char *entry_point_name = shader_bindings->entry_point_name; + if (entry_point_name == nullptr) { + error(user_context) << "Vulkan: Failed to setup compute pipeline ... missing entry point name!\n"; + return halide_error_code_generic_error; + } + + uint32_t dispatch_constant_index = 0; + uint32_t dispatch_constant_ids[4] = {0, 0, 0, 0}; + uint32_t dispatch_constant_values[4] = {0, 0, 0, 0}; + + // locate the mapping for overriding any dynamic shared memory allocation sizes + if (shader_bindings->shared_memory_allocations_count && dispatch_data->shared_mem_bytes) { + + uint32_t shared_mem_constant_id = 0; + uint32_t static_shared_mem_bytes = 0; + uint32_t shared_mem_type_size = 0; + + for (uint32_t sm = 0; sm < shader_bindings->shared_memory_allocations_count; sm++) { + VulkanSharedMemoryAllocation *allocation = &(shader_bindings->shared_memory_allocations[sm]); + if (allocation->constant_id == 0) { + // static fixed-size allocation + static_shared_mem_bytes += allocation->type_size * allocation->array_size; + } else { + // dynamic allocation + if (shared_mem_constant_id > 0) { + error(user_context) << "Vulkan: Multiple dynamic shared memory allocations found! Only one is suported!!\n"; + result = VK_ERROR_TOO_MANY_OBJECTS; + break; + } + shared_mem_constant_id = allocation->constant_id; + shared_mem_type_size = allocation->type_size; + } + } + uint32_t shared_mem_bytes_avail = (dispatch_data->shared_mem_bytes - static_shared_mem_bytes); + debug(user_context) << " pipeline uses " << static_shared_mem_bytes << " bytes of static shared memory\n"; + debug(user_context) << " dispatch requests " << dispatch_data->shared_mem_bytes << " bytes of shared memory\n"; + debug(user_context) << " dynamic shared memory " << shared_mem_bytes_avail << " bytes available\n"; + + // setup the dynamic array size + if ((shared_mem_constant_id > 0) && (shared_mem_bytes_avail > 0)) { + uint32_t dynamic_array_size = (uint32_t)shared_mem_bytes_avail / shared_mem_type_size; + debug(user_context) << " setting shared memory to " << (uint32_t)dynamic_array_size << " elements " + << "(or " << (uint32_t)shared_mem_bytes_avail << " bytes)\n"; + + // save the shared mem specialization constant in the first slot + dispatch_constant_ids[dispatch_constant_index] = shared_mem_constant_id; + dispatch_constant_values[dispatch_constant_index] = dynamic_array_size; + dispatch_constant_index++; + } + } + + // locate the mapping for overriding any dynamic workgroup local sizes + if (shader_bindings->dispatch_data.local_size_binding.constant_id[0] != 0) { + for (uint32_t dim = 0; dim < 3; dim++) { + dispatch_constant_ids[dispatch_constant_index] = shader_bindings->dispatch_data.local_size_binding.constant_id[dim]; + dispatch_constant_values[dispatch_constant_index] = dispatch_data->local_size[dim]; + dispatch_constant_index++; + } + } + + // verify the specialization constants actually exist + for (uint32_t dc = 0; dc < dispatch_constant_index; dc++) { + const uint32_t invalid_index = uint32_t(-1); + uint32_t found_index = invalid_index; + for (uint32_t sc = 0; sc < shader_bindings->specialization_constants_count; sc++) { + if (shader_bindings->specialization_constants[sc].constant_id == dispatch_constant_ids[dc]) { + debug(user_context) << " binding specialization constant [" << dispatch_constant_ids[dc] << "] " + << "'" << shader_bindings->specialization_constants[sc].constant_name << "' " + << " => " << dispatch_constant_values[dc] << "\n"; + found_index = sc; + break; + } + } + if (found_index == invalid_index) { + error(user_context) << "Vulkan: Failed to locate dispatch constant index for shader binding!\n"; + result = VK_ERROR_INITIALIZATION_FAILED; + } + } + + // don't even attempt to create the pipeline layout if we encountered errors in the shader binding + if (result != VK_SUCCESS) { + error(user_context) << "Vulkan: Failed to decode shader bindings! " << vk_get_error_name(result) << "\n"; + return halide_error_code_generic_error; + } + + // Prepare specialization mapping for all dispatch constants + uint32_t dispatch_constant_count = 0; + VkSpecializationMapEntry specialization_map_entries[4]; + memset(specialization_map_entries, 0, 4 * sizeof(VkSpecializationMapEntry)); + for (uint32_t dc = 0; dc < dispatch_constant_index && dc < 4; dc++) { + specialization_map_entries[dc].constantID = dispatch_constant_ids[dc]; + specialization_map_entries[dc].size = sizeof(uint32_t); + specialization_map_entries[dc].offset = dc * sizeof(uint32_t); + dispatch_constant_count++; + } + + if (dispatch_constant_count > 0) { + + // Prepare specialization info block for the shader stage + VkSpecializationInfo specialization_info{}; + specialization_info.dataSize = dispatch_constant_count * sizeof(uint32_t); + specialization_info.mapEntryCount = dispatch_constant_count; + specialization_info.pMapEntries = specialization_map_entries; + specialization_info.pData = dispatch_constant_values; + + // Recreate the pipeline with the requested shared memory allocation + if (shader_bindings->compute_pipeline) { + int error_code = vk_destroy_compute_pipeline(user_context, allocator, shader_bindings->compute_pipeline); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to destroy compute pipeline!\n"; + return halide_error_code_generic_error; + } + shader_bindings->compute_pipeline = {0}; + } + + int error_code = vk_create_compute_pipeline(user_context, allocator, entry_point_name, shader_module, pipeline_layout, &specialization_info, &(shader_bindings->compute_pipeline)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create compute pipeline!\n"; + return error_code; + } + + } else { + + // Construct and re-use the fixed pipeline + if (shader_bindings->compute_pipeline == 0) { + int error_code = vk_create_compute_pipeline(user_context, allocator, entry_point_name, shader_module, pipeline_layout, nullptr, &(shader_bindings->compute_pipeline)); + if (error_code != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to create compute pipeline!\n"; + return error_code; + } + } + } + + return halide_error_code_success; +} + +int vk_destroy_compute_pipeline(void *user_context, + VulkanMemoryAllocator *allocator, + VkPipeline compute_pipeline) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_compute_pipeline (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "device: " << (void *)allocator->current_device() << ", " + << "compute_pipeline: " << (void *)compute_pipeline << ")\n"; +#endif + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy compute pipeline ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + vkDestroyPipeline(allocator->current_device(), compute_pipeline, allocator->callbacks()); + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +VulkanShaderBinding *vk_decode_shader_bindings(void *user_context, VulkanMemoryAllocator *allocator, const uint32_t *module_ptr, uint32_t module_size) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_decode_shader_bindings (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "module_ptr: " << (void *)module_ptr << ", " + << "module_size: " << module_size << ")\n"; + + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to decode shader bindings ... invalid allocator pointer!\n"; + return nullptr; + } + + if ((module_ptr == nullptr) || (module_size < (2 * sizeof(uint32_t)))) { + error(user_context) << "Vulkan: Failed to decode shader bindings ... invalid module buffer!\n"; + return nullptr; + } + + // Decode the sidecar for the module that lists the descriptor sets + // corresponding to each entry point contained in the module. + // + // Construct a shader binding for each entry point that defines all + // the buffers, constants, shared memory, and workgroup sizes + // that are required for execution. + // + // Like the SPIR-V code module, each entry is one word (1x uint32_t). + // Variable length sections are prefixed with their length (ie number of entries). + // + // [0] Header word count (total length of header) + // [1] Number of descriptor sets + // ... For each descriptor set ... + // ... [0] Length of entry point name (padded to nearest word size) + // ....... [*] Entry point string data (padded with null chars) + // ... [1] Number of uniform buffers for this descriptor set + // ... [2] Number of storage buffers for this descriptor set + // ... [3] Number of specialization constants for this descriptor set + // ....... For each specialization constant ... + // ....... [0] Length of constant name string (padded to nearest word size) + // ........... [*] Constant name string data (padded with null chars) + // ....... [1] Constant id (as used in VkSpecializationMapEntry for binding) + // ....... [2] Size of data type (in bytes) + // ... [4] Number of shared memory allocations for this descriptor set + // ....... For each allocation ... + // ....... [0] Length of variable name string (padded to nearest word size) + // ........... [*] Variable name string data (padded with null chars) + // ....... [1] Constant id to use for overriding array size (zero if it is not bound to a specialization constant) + // ....... [2] Size of data type (in bytes) + // ....... [3] Size of array (ie element count) + // ... [4] Dynamic workgroup dimensions bound to specialization constants + // ....... [0] Constant id to use for local_size_x (zero if it was statically declared and not bound to a specialization constant) + // ....... [1] Constant id to use for local_size_y + // ....... [2] Constant id ot use for local_size_z + // + // NOTE: See CodeGen_Vulkan_Dev::SPIRV_Emitter::encode_header() for the encoding + // + // Both vk_decode_shader_bindings() and vk_compile_shader_module() will + // need to be updated if the header encoding ever changes! + // + uint32_t module_entries = module_size / sizeof(uint32_t); + uint32_t idx = 1; // skip past the header_word_count + uint32_t shader_count = module_ptr[idx++]; + if (shader_count < 1) { + error(user_context) << "Vulkan: Failed to decode shader bindings ... no descriptors found!\n"; + return nullptr; // no descriptors + } + + // allocate an array of shader bindings (one for each entry point in the module) + VkSystemAllocationScope alloc_scope = VkSystemAllocationScope::VK_SYSTEM_ALLOCATION_SCOPE_OBJECT; + size_t shader_bindings_size = shader_count * sizeof(VulkanShaderBinding); + VulkanShaderBinding *shader_bindings = (VulkanShaderBinding *)vk_host_malloc(user_context, shader_bindings_size, 0, alloc_scope, allocator->callbacks()); + if (shader_bindings == nullptr) { + error(user_context) << "Vulkan: Failed to allocate shader_bindings! Out of memory!\n"; + return nullptr; + } + memset(shader_bindings, 0, shader_bindings_size); + + // decode and fill in the shader binding for each entry point + for (uint32_t n = 0; (n < shader_count) && (idx < module_entries); n++) { + halide_debug_assert(user_context, (idx + 8) < module_entries); // should be at least 8 entries + + // [0] Length of entry point name (padded to nearest word size) + uint32_t entry_point_name_length = module_ptr[idx++]; + + // [*] Entry point string data (padded with null chars) + const char *entry_point_name = (const char *)(module_ptr + idx); // NOTE: module owns string data + idx += entry_point_name_length; // skip past string data + + // [1] Number of uniform buffers for this descriptor set + uint32_t uniform_buffer_count = module_ptr[idx++]; + + // [2] Number of storage buffers for this descriptor set + uint32_t storage_buffer_count = module_ptr[idx++]; + + // [3] Number of specialization constants for this descriptor set + uint32_t specialization_constants_count = module_ptr[idx++]; + + // Decode all specialization constants + VulkanSpecializationConstant *specialization_constants = nullptr; + if (specialization_constants_count > 0) { + + // Allocate an array to store the decoded specialization constant data + size_t specialization_constants_size = specialization_constants_count * sizeof(VulkanSpecializationConstant); + specialization_constants = (VulkanSpecializationConstant *)vk_host_malloc(user_context, specialization_constants_size, 0, alloc_scope, allocator->callbacks()); + if (specialization_constants == nullptr) { + error(user_context) << "Vulkan: Failed to allocate specialization_constants! Out of memory!\n"; + return nullptr; + } + memset(specialization_constants, 0, specialization_constants_size); + + // For each specialization constant ... + for (uint32_t sc = 0; sc < specialization_constants_count; sc++) { + halide_debug_assert(user_context, (idx + 4) < module_entries); // should be at least 4 entries + + // [0] Length of constant name string (padded to nearest word size) + uint32_t constant_name_length = module_ptr[idx++]; + + // [*] Constant name string data (padded with null chars) + const char *constant_name = (const char *)(module_ptr + idx); + specialization_constants[sc].constant_name = constant_name; // NOTE: module owns string data + idx += constant_name_length; // skip past string data + + // [1] Constant id (as used in VkSpecializationMapEntry for binding) + specialization_constants[sc].constant_id = module_ptr[idx++]; + + // [2] Size of data type (in bytes) + specialization_constants[sc].type_size = module_ptr[idx++]; + } + } + + // [4] Number of shared memory allocations for this descriptor set + uint32_t shared_memory_allocations_count = module_ptr[idx++]; // [3] + + // Decode all shared memory allocations ... + VulkanSharedMemoryAllocation *shared_memory_allocations = nullptr; + if (shared_memory_allocations_count > 0) { + + // Allocate an array to store the decoded shared memory allocation data + size_t shared_memory_allocations_size = shared_memory_allocations_count * sizeof(VulkanSharedMemoryAllocation); + shared_memory_allocations = (VulkanSharedMemoryAllocation *)vk_host_malloc(user_context, shared_memory_allocations_size, 0, alloc_scope, allocator->callbacks()); + if (shared_memory_allocations == nullptr) { + error(user_context) << "Vulkan: Failed to allocate shared_memory_allocations! Out of memory!\n"; + return nullptr; + } + memset(shared_memory_allocations, 0, shared_memory_allocations_size); + + // For each shared memory allocation ... + for (uint32_t sm = 0; sm < shared_memory_allocations_count && (idx < module_entries); sm++) { + halide_debug_assert(user_context, (idx + 4) < module_entries); // should be at least 4 entries + + // [0] Length of variable name string (padded to nearest word size) + uint32_t variable_name_length = module_ptr[idx++]; + + // [*] Variable name string data (padded with null chars) + const char *variable_name = (const char *)(module_ptr + idx); + shared_memory_allocations[sm].variable_name = variable_name; // NOTE: module owns string data + idx += variable_name_length; // skip past string data + + // [1] Constant id to use for overriding array size + shared_memory_allocations[sm].constant_id = module_ptr[idx++]; + + // [2] Size of data type (in bytes) + shared_memory_allocations[sm].type_size = module_ptr[idx++]; + + // [3] Size of array (ie element count) + shared_memory_allocations[sm].array_size = module_ptr[idx++]; + } + } + + // [4] Dynamic workgroup dimensions bound to specialization constants + halide_debug_assert(user_context, (idx + 3) < module_entries); // should be at least 3 entries + for (uint32_t dim = 0; dim < 3 && (idx < module_entries); dim++) { + shader_bindings[n].dispatch_data.local_size_binding.constant_id[dim] = module_ptr[idx++]; + } + +#ifdef DEBUG_RUNTIME + + debug(user_context) << " [" << n << "] '" << (const char *)entry_point_name << "'\n"; + + debug(user_context) << " uniform_buffer_count=" << uniform_buffer_count << "\n" + << " storage_buffer_count=" << storage_buffer_count << "\n"; + + debug(user_context) << " specialization_constants_count=" << specialization_constants_count << "\n"; + for (uint32_t sc = 0; sc < specialization_constants_count; sc++) { + debug(user_context) << " [" << sc << "] " + << "constant_name='" << (const char *)specialization_constants[sc].constant_name << "' " + << "constant_id=" << specialization_constants[sc].constant_id << " " + << "type_size=" << specialization_constants[sc].type_size << "\n"; + } + + debug(user_context) << " shared_memory_allocations_count=" << shared_memory_allocations_count << "\n"; + for (uint32_t sm = 0; sm < shared_memory_allocations_count; sm++) { + debug(user_context) << " [" << sm << "] " + << "variable_name='" << (const char *)shared_memory_allocations[sm].variable_name << "' " + << "constant_id=" << shared_memory_allocations[sm].constant_id << " " + << "type_size=" << shared_memory_allocations[sm].type_size << " " + << "array_size=" << shared_memory_allocations[sm].array_size << "\n"; + } + debug(user_context) << " local_size_binding=["; + for (uint32_t dim = 0; dim < 3 && (idx < module_entries); dim++) { + debug(user_context) << shader_bindings[n].dispatch_data.local_size_binding.constant_id[dim] << " "; + } + debug(user_context) << "]\n"; +#endif + shader_bindings[n].entry_point_name = entry_point_name; // NOTE: module owns string data + shader_bindings[n].uniform_buffer_count = uniform_buffer_count; + shader_bindings[n].storage_buffer_count = storage_buffer_count; + shader_bindings[n].specialization_constants_count = specialization_constants_count; + shader_bindings[n].specialization_constants = specialization_constants; + shader_bindings[n].shared_memory_allocations_count = shared_memory_allocations_count; + shader_bindings[n].shared_memory_allocations = shared_memory_allocations; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return shader_bindings; +} + +VulkanCompilationCacheEntry *vk_compile_shader_module(void *user_context, VulkanMemoryAllocator *allocator, + const char *ptr, int size) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_compile_shader_module (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "device: " << (void *)allocator->current_device() << ", " + << "module: " << (void *)ptr << ", " + << "size: " << size << ")\n"; + + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to compile shader modules ... invalid allocator pointer!\n"; + return nullptr; + } + + if ((ptr == nullptr) || (size <= 0)) { + error(user_context) << "Vulkan: Failed to compile shader modules ... invalid program source buffer!\n"; + return nullptr; + } + + const uint32_t *module_ptr = (const uint32_t *)ptr; + const uint32_t module_size = (const uint32_t)size; + + halide_debug_assert(user_context, module_ptr != nullptr); + halide_debug_assert(user_context, module_size >= (2 * sizeof(uint32_t))); + + uint32_t header_word_count = module_ptr[0]; + uint32_t shader_count = module_ptr[1]; + uint32_t header_size = header_word_count * sizeof(uint32_t); + + // skip past the preamble header to the start of the SPIR-V binary + const uint32_t *binary_ptr = (module_ptr + header_word_count); + size_t binary_size = (size - header_size); + +#ifdef DEBUG_RUNTIME + debug(user_context) << "Vulkan: Decoding module (" + << "module_ptr: " << (void *)module_ptr << ", " + << "header_word_count: " << header_word_count << ", " + << "header_size: " << header_size << ", " + << "binar_ptr: " << (void *)binary_ptr << ", " + << "binary_size: " << (uint32_t)binary_size << ")\n"; +#endif + + VkShaderModuleCreateInfo shader_info = { + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + nullptr, // pointer to structure extending this + 0, // flags (curently unused) + (size_t)binary_size, // code size in bytes + (const uint32_t *)binary_ptr // source + }; + + VkSystemAllocationScope alloc_scope = VkSystemAllocationScope::VK_SYSTEM_ALLOCATION_SCOPE_OBJECT; + VulkanCompilationCacheEntry *cache_entry = (VulkanCompilationCacheEntry *)vk_host_malloc(user_context, sizeof(VulkanCompilationCacheEntry), 0, alloc_scope, allocator->callbacks()); + if (cache_entry == nullptr) { + error(user_context) << "Vulkan: Failed to allocate compilation cache entry! Out of memory!\n"; + return nullptr; + } + memset(cache_entry, 0, sizeof(VulkanCompilationCacheEntry)); + + // decode the entry point data and extract the shader bindings + VulkanShaderBinding *decoded_bindings = vk_decode_shader_bindings(user_context, allocator, module_ptr, module_size); + if (decoded_bindings == nullptr) { + error(user_context) << "Vulkan: Failed to decode shader bindings!\n"; + return nullptr; + } + + // save the shader bindings in the cache entry + cache_entry->shader_bindings = decoded_bindings; + cache_entry->shader_count = shader_count; + + VkResult result = vkCreateShaderModule(allocator->current_device(), &shader_info, allocator->callbacks(), &cache_entry->shader_module); + if ((result != VK_SUCCESS)) { + error(user_context) << "Vulkan: vkCreateShaderModule Failed! Error returned: " << vk_get_error_name(result) << "\n"; + vk_host_free(user_context, cache_entry->shader_bindings, allocator->callbacks()); + vk_host_free(user_context, cache_entry, allocator->callbacks()); + return nullptr; + } + + // allocate an array for storing the descriptor set layouts + if (cache_entry->shader_count) { + cache_entry->descriptor_set_layouts = (VkDescriptorSetLayout *)vk_host_malloc(user_context, cache_entry->shader_count * sizeof(VkDescriptorSetLayout), 0, alloc_scope, allocator->callbacks()); + if (cache_entry->descriptor_set_layouts == nullptr) { + error(user_context) << "Vulkan: Failed to allocate descriptor set layouts for cache entry! Out of memory!\n"; + return nullptr; + } + memset(cache_entry->descriptor_set_layouts, 0, cache_entry->shader_count * sizeof(VkDescriptorSetLayout)); + } + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return cache_entry; +} + +int vk_destroy_shader_modules(void *user_context, VulkanMemoryAllocator *allocator) { + +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_destroy_shader_modules (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "device: " << (void *)allocator->current_device() << ")\n"; + + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + if (allocator == nullptr) { + error(user_context) << "Vulkan: Failed to destroy shader modules ... invalid allocator pointer!\n"; + return halide_error_code_generic_error; + } + + // Functor to match compilation cache destruction call with scoped params + struct DestroyShaderModule { + void *user_context = nullptr; + VulkanMemoryAllocator *allocator = nullptr; + + DestroyShaderModule(void *ctx, VulkanMemoryAllocator *allocator) + : user_context(ctx), allocator(allocator) { + } + + void operator()(VulkanCompilationCacheEntry *cache_entry) { + if (cache_entry != nullptr) { + if (cache_entry->descriptor_set_layouts) { + for (uint32_t n = 0; n < cache_entry->shader_count; n++) { + debug(user_context) << " destroying descriptor set layout [" << n << "] " << cache_entry->shader_bindings[n].entry_point_name << "\n"; + vk_destroy_descriptor_set_layout(user_context, allocator, cache_entry->descriptor_set_layouts[n]); + cache_entry->descriptor_set_layouts[n] = {0}; + } + vk_host_free(user_context, cache_entry->descriptor_set_layouts, allocator->callbacks()); + cache_entry->descriptor_set_layouts = nullptr; + } + if (cache_entry->pipeline_layout) { + debug(user_context) << " destroying pipeline layout " << (void *)cache_entry->pipeline_layout << "\n"; + vk_destroy_pipeline_layout(user_context, allocator, cache_entry->pipeline_layout); + cache_entry->pipeline_layout = {0}; + } + if (cache_entry->shader_bindings) { + for (uint32_t n = 0; n < cache_entry->shader_count; n++) { + if (cache_entry->shader_bindings[n].args_region) { + vk_destroy_scalar_uniform_buffer(user_context, allocator, cache_entry->shader_bindings[n].args_region); + cache_entry->shader_bindings[n].args_region = nullptr; + } + if (cache_entry->shader_bindings[n].descriptor_pool) { + vk_destroy_descriptor_pool(user_context, allocator, cache_entry->shader_bindings[n].descriptor_pool); + cache_entry->shader_bindings[n].descriptor_pool = {0}; + } + if (cache_entry->shader_bindings[n].specialization_constants) { + vk_host_free(user_context, cache_entry->shader_bindings[n].specialization_constants, allocator->callbacks()); + cache_entry->shader_bindings[n].specialization_constants = nullptr; + } + if (cache_entry->shader_bindings[n].shared_memory_allocations) { + vk_host_free(user_context, cache_entry->shader_bindings[n].shared_memory_allocations, allocator->callbacks()); + cache_entry->shader_bindings[n].shared_memory_allocations = nullptr; + } + if (cache_entry->shader_bindings[n].compute_pipeline) { + vk_destroy_compute_pipeline(user_context, allocator, cache_entry->shader_bindings[n].compute_pipeline); + cache_entry->shader_bindings[n].compute_pipeline = {0}; + } + } + + vk_host_free(user_context, cache_entry->shader_bindings, allocator->callbacks()); + cache_entry->shader_bindings = nullptr; + } + if (cache_entry->shader_module) { + debug(user_context) << " . destroying shader module " << (void *)cache_entry->shader_module << "\n"; + vkDestroyShaderModule(allocator->current_device(), cache_entry->shader_module, allocator->callbacks()); + cache_entry->shader_module = {0}; + } + cache_entry->shader_count = 0; + vk_host_free(user_context, cache_entry, allocator->callbacks()); + cache_entry = nullptr; + } + } + }; + + DestroyShaderModule module_destructor(user_context, allocator); + compilation_cache.delete_context(user_context, allocator->current_device(), module_destructor); + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +int vk_do_multidimensional_copy(void *user_context, VkCommandBuffer command_buffer, + const device_copy &c, uint64_t src_offset, uint64_t dst_offset, + int d, bool from_host, bool to_host) { + if (d == 0) { + + if ((!from_host && to_host) || + (from_host && !to_host) || + (!from_host && !to_host)) { + + VkBufferCopy buffer_copy = { + c.src_begin + src_offset, // srcOffset + dst_offset, // dstOffset + c.chunk_size // size + }; + + VkBuffer *src_buffer = reinterpret_cast(c.src); + VkBuffer *dst_buffer = reinterpret_cast(c.dst); + if (!src_buffer || !dst_buffer) { + error(user_context) << "Vulkan: Failed to retrieve buffer for device memory!\n"; + return halide_error_code_internal_error; + } + + vkCmdCopyBuffer(command_buffer, *src_buffer, *dst_buffer, 1, &buffer_copy); + + } else if ((c.dst + dst_offset) != (c.src + src_offset)) { + // Could reach here if a user called directly into the + // Vulkan API for a device->host copy on a source buffer + // with device_dirty = false. + memcpy((void *)(c.dst + dst_offset), (void *)(c.src + src_offset), c.chunk_size); + } + } else { + // TODO: deal with negative strides. Currently the code in + // device_buffer_utils.h does not do so either. + uint64_t src_off = 0, dst_off = 0; + for (uint64_t i = 0; i < c.extent[d - 1]; i++) { + int err = vk_do_multidimensional_copy(user_context, command_buffer, c, + src_offset + src_off, + dst_offset + dst_off, + d - 1, from_host, to_host); + dst_off += c.dst_stride_bytes[d - 1]; + src_off += c.src_stride_bytes[d - 1]; + if (err) { + return err; + } + } + } + return halide_error_code_success; +} + +int vk_device_crop_from_offset(void *user_context, + const struct halide_buffer_t *src, + int64_t offset, + struct halide_buffer_t *dst) { + + VulkanContext ctx(user_context); + if (ctx.error != halide_error_code_success) { + error(user_context) << "Vulkan: Failed to acquire context!\n"; + return ctx.error; + } + +#ifdef DEBUG_RUNTIME + uint64_t t_before = halide_current_time_ns(user_context); +#endif + + if (offset < 0) { + error(user_context) << "Vulkan: Invalid offset for device crop!\n"; + return halide_error_code_device_crop_failed; + } + + // get the allocated region for the device + MemoryRegion *device_region = reinterpret_cast(src->device); + if (device_region == nullptr) { + error(user_context) << "Vulkan: Failed to crop region! Invalide device region!\n"; + return halide_error_code_device_crop_failed; + } + + // create the croppeg region from the allocated region + MemoryRegion *cropped_region = ctx.allocator->create_crop(user_context, device_region, (uint64_t)offset); + if ((cropped_region == nullptr) || (cropped_region->handle == nullptr)) { + error(user_context) << "Vulkan: Failed to crop region! Unable to create memory region!\n"; + return halide_error_code_device_crop_failed; + } + + // update the destination to the cropped region + dst->device = (uint64_t)cropped_region; + dst->device_interface = src->device_interface; + +#ifdef DEBUG_RUNTIME + uint64_t t_after = halide_current_time_ns(user_context); + debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n"; +#endif + + return halide_error_code_success; +} + +// -------------------------------------------------------------------------- + +} // namespace +} // namespace Vulkan +} // namespace Internal +} // namespace Runtime +} // namespace Halide + +#endif // HALIDE_RUNTIME_VULKAN_RESOURCES_H diff --git a/src/runtime/windows_vulkan.cpp b/src/runtime/windows_vulkan.cpp new file mode 100644 index 000000000000..cd12ca0aa74b --- /dev/null +++ b/src/runtime/windows_vulkan.cpp @@ -0,0 +1,2 @@ +#define WINDOWS +#include "vulkan.cpp" diff --git a/test/common/gpu_object_lifetime_tracker.h b/test/common/gpu_object_lifetime_tracker.h index d8f18716261c..436c44b6262c 100644 --- a/test/common/gpu_object_lifetime_tracker.h +++ b/test/common/gpu_object_lifetime_tracker.h @@ -22,7 +22,7 @@ class GpuObjectLifetimeTracker { } }; - std::array object_types = {{ + std::array object_types = {{ {"Caching compiled kernel:", "Releasing cached compilation:"}, // OpenCL objects @@ -45,6 +45,16 @@ class GpuObjectLifetimeTracker { {"halide_remote_load_library", "halide_remote_release_library"}, {"ion_alloc", "ion_free"}, + // Vulkan objects + {"vk_create_context", "vk_destroy_context", true}, + {"vk_create_command_pool", "vk_destroy_command_pool"}, + {"vk_create_command_buffer", "vk_destroy_command_buffer"}, + {"vk_create_pipeline_layout", "vk_destroy_pipeline_layout"}, + {"vk_create_compute_pipeline", "vk_destroy_compute_pipeline"}, + {"vk_create_descriptor_pool", "vk_destroy_descriptor_pool"}, + {"Vulkan: Allocated memory for device region", "Vulkan: Deallocated memory for device region"}, + {"Vulkan: Created buffer", "Vulkan: Destroyed buffer"}, + // WebGPU objects {"wgpuCreateInstance", "wgpuInstanceRelease", true}, {"wgpuDeviceCreateBuffer", "wgpuBufferRelease"}, diff --git a/test/correctness/boundary_conditions.cpp b/test/correctness/boundary_conditions.cpp index 03d12453295d..fec8c47b1192 100644 --- a/test/correctness/boundary_conditions.cpp +++ b/test/correctness/boundary_conditions.cpp @@ -385,6 +385,7 @@ int main(int argc, char **argv) { int vector_width_max = 32; if (target.has_feature(Target::Metal) || + target.has_feature(Target::Vulkan) || target.has_feature(Target::OpenGLCompute) || target.has_feature(Target::D3D12Compute) || target.has_feature(Target::WebGPU)) { diff --git a/test/correctness/gpu_allocation_cache.cpp b/test/correctness/gpu_allocation_cache.cpp index 97c2e91322fb..cbb864bd6409 100644 --- a/test/correctness/gpu_allocation_cache.cpp +++ b/test/correctness/gpu_allocation_cache.cpp @@ -24,11 +24,14 @@ int main(int argc, char **argv) { printf("[SKIP] Allocation cache not yet implemented for D3D12Compute.\n"); return 0; } + if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) { + printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK only allows 30 buffers to be allocated)!\n"); + return 0; + } if (target.has_feature(Target::WebGPU)) { printf("[SKIP] Allocation cache not yet implemented for WebGPU.\n"); return 0; } - const int N = 30; Var x, y, xi, yi; @@ -154,22 +157,26 @@ int main(int argc, char **argv) { } } - // Now benchmark with and without, (just informational, as this isn't a performance test) - double t1 = Tools::benchmark([&]() { - test1(true, false); - test2(true, false); - test3(true, false); - }); - - double t2 = Tools::benchmark([&]() { - test1(false, false); - test2(false, false); - test3(false, false); - }); - - printf("Runtime with cache: %f\n" - "Without cache: %f\n", - t1, t2); + // Vulkan will OOM unless allocation cache is used ... skip this since we just ran the same tests above concurrently + if (!target.has_feature(Target::Vulkan)) { + + // Now benchmark with and without, (just informational, as this isn't a performance test) + double t1 = Tools::benchmark([&]() { + test1(true, false); + test2(true, false); + test3(true, false); + }); + + double t2 = Tools::benchmark([&]() { + test1(false, false); + test2(false, false); + test3(false, false); + }); + + printf("Runtime with cache: %f\n" + "Without cache: %f\n", + t1, t2); + } printf("Success!\n"); return 0; diff --git a/test/correctness/gpu_dynamic_shared.cpp b/test/correctness/gpu_dynamic_shared.cpp index 1af9b3cd25be..cdb229beca19 100644 --- a/test/correctness/gpu_dynamic_shared.cpp +++ b/test/correctness/gpu_dynamic_shared.cpp @@ -15,6 +15,21 @@ int main(int argc, char **argv) { return 0; } + if (t.has_feature(Target::Vulkan)) { + const auto *interface = get_device_interface_for_device_api(DeviceAPI::Vulkan); + assert(interface->compute_capability != nullptr); + int major, minor; + int err = interface->compute_capability(nullptr, &major, &minor); + if (err != 0 || (major == 1 && minor < 2)) { + printf("[SKIP] Vulkan %d.%d is less than required 1.2.\n", major, minor); + return 0; + } + if ((t.os == Target::IOS) || (t.os == Target::OSX)) { + printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamic LocalSizeId yet)!\n"); + return 0; + } + } + // Check dynamic allocations per-block and per-thread into both // shared and global for (int per_thread = 0; per_thread < 2; per_thread++) { diff --git a/test/correctness/gpu_reuse_shared_memory.cpp b/test/correctness/gpu_reuse_shared_memory.cpp index a557e08ed58f..422775ac2021 100644 --- a/test/correctness/gpu_reuse_shared_memory.cpp +++ b/test/correctness/gpu_reuse_shared_memory.cpp @@ -191,6 +191,8 @@ int main(int argc, char **argv) { printf("Running dynamic shared test\n"); if (t.has_feature(Target::OpenGLCompute) && memory_type == MemoryType::GPUShared) { printf("Skipping test because GL doesn't support dynamic sizes for shared memory\n"); + } else if (t.has_feature(Target::Vulkan) && ((t.os == Target::IOS) || t.os == Target::OSX)) { + printf("Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamic sizes for shared memory)!\n"); } else { if (dynamic_shared_test(memory_type) != 0) { return 1; diff --git a/test/correctness/gpu_specialize.cpp b/test/correctness/gpu_specialize.cpp index 5fd04829d362..9cae395a0082 100644 --- a/test/correctness/gpu_specialize.cpp +++ b/test/correctness/gpu_specialize.cpp @@ -4,10 +4,15 @@ using namespace Halide; int main(int argc, char **argv) { - if (!get_jit_target_from_environment().has_gpu_feature()) { + Halide::Target target = get_jit_target_from_environment(); + if (!target.has_gpu_feature()) { printf("[SKIP] No GPU target enabled.\n"); return 0; } + if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) { + printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamically allocated shared mem)!\n"); + return 0; + } { Func f("f"), g("g"), h("h"); diff --git a/test/correctness/interleave_rgb.cpp b/test/correctness/interleave_rgb.cpp index 54988b5cb3b4..3a679a239035 100644 --- a/test/correctness/interleave_rgb.cpp +++ b/test/correctness/interleave_rgb.cpp @@ -112,7 +112,6 @@ int main(int argc, char **argv) { if (!test_deinterleave(x_stride)) return 1; if (!test_deinterleave(x_stride)) return 1; } - printf("Success!\n"); return 0; } diff --git a/test/correctness/interpreter.cpp b/test/correctness/interpreter.cpp index e4d578cc0e70..832197387079 100644 --- a/test/correctness/interpreter.cpp +++ b/test/correctness/interpreter.cpp @@ -13,6 +13,11 @@ int main(int argc, char **argv) { return 0; } + if (target.has_feature(Target::Vulkan)) { + printf("[SKIP] Skipping test for Vulkan (which doesn't support dynamically allocated shared mem)!\n"); + return 0; + } + // Workaround for https://github.com/halide/Halide/issues/7420 if (target.has_feature(Target::WebGPU)) { printf("[SKIP] workaround for issue #7420\n"); @@ -160,7 +165,7 @@ int main(int argc, char **argv) { uint8_t correct = (uint8_t)(((int)in_buf(x + 1, y) - in_buf(x - 1, y)) >> 1); if (out_buf(x, y) != correct) { printf("out_buf(%d, %d) = %d instead of %d\n", x, y, out_buf(x, y), correct); - return 1; + return -1; } } } @@ -189,7 +194,7 @@ int main(int argc, char **argv) { uint8_t correct = (uint8_t)((int)std::floor(std::sqrt(a * a + b * b))); if (out_buf(x, y) != correct) { printf("out_buf(%d, %d) = %d instead of %d\n", x, y, out_buf(x, y), correct); - return 1; + return -1; } } } diff --git a/test/correctness/math.cpp b/test/correctness/math.cpp index 07480f2c8b39..618a30ea104a 100644 --- a/test/correctness/math.cpp +++ b/test/correctness/math.cpp @@ -124,57 +124,65 @@ struct TestArgs { // for another day. // Version for a one argument function. -#define fun_1(type_ret, type, name, c_name) \ - void test_##type##_##name(Buffer in) { \ - Target target = get_jit_target_from_environment(); \ - if (!target.supports_type(type_of())) { \ - return; \ - } \ - Func test_##name("test_" #name); \ - Var x("x"), xi("xi"); \ - test_##name(x) = name(in(x)); \ - if (target.has_gpu_feature()) { \ - test_##name.gpu_tile(x, xi, 8); \ - } else if (target.has_feature(Target::HVX)) { \ - test_##name.hexagon(); \ - } \ - Buffer result = test_##name.realize({in.extent(0)}, target); \ - for (int i = 0; i < in.extent(0); i++) { \ - type_ret c_result = c_name(in(i)); \ - if (!relatively_equal(c_result, result(i), target)) { \ - fprintf(stderr, "For " #name "(%.20f) == %.20f from C and %.20f from %s.\n", \ - (double)in(i), (double)c_result, (double)result(i), \ - target.to_string().c_str()); \ - num_errors++; \ - } \ - } \ +#define fun_1(type_ret, type, name, c_name) \ + void test_##type##_##name(Buffer in) { \ + Type type_of_type = type_of(); \ + Target target = get_jit_target_from_environment(); \ + if (!target.supports_type(type_of_type)) { \ + return; \ + } \ + if (target.has_feature(Target::Vulkan) && (type_of_type.is_float() && type_of_type.bits() > 32)) { \ + return; \ + } \ + Func test_##name("test_" #name); \ + Var x("x"), xi("xi"); \ + test_##name(x) = name(in(x)); \ + if (target.has_gpu_feature()) { \ + test_##name.gpu_tile(x, xi, 8); \ + } else if (target.has_feature(Target::HVX)) { \ + test_##name.hexagon(); \ + } \ + Buffer result = test_##name.realize({in.extent(0)}, target); \ + for (int i = 0; i < in.extent(0); i++) { \ + type_ret c_result = c_name(in(i)); \ + if (!relatively_equal(c_result, result(i), target)) { \ + fprintf(stderr, "For " #name "(%.20f) == %.20f from C and %.20f from %s.\n", \ + (double)in(i), (double)c_result, (double)result(i), \ + target.to_string().c_str()); \ + num_errors++; \ + } \ + } \ } // Version for a two argument function -#define fun_2(type_ret, type, name, c_name) \ - void test_##type##_##name(Buffer in) { \ - Target target = get_jit_target_from_environment(); \ - if (!target.supports_type(type_of())) { \ - return; \ - } \ - Func test_##name("test_" #name); \ - Var x("x"), xi("xi"); \ - test_##name(x) = name(in(0, x), in(1, x)); \ - if (target.has_gpu_feature()) { \ - test_##name.gpu_tile(x, xi, 8); \ - } else if (target.has_feature(Target::HVX)) { \ - test_##name.hexagon(); \ - } \ - Buffer result = test_##name.realize({in.height()}, target); \ - for (int i = 0; i < in.height(); i++) { \ - type_ret c_result = c_name(in(0, i), in(1, i)); \ - if (!relatively_equal(c_result, result(i), target)) { \ - fprintf(stderr, "For " #name "(%.20f, %.20f) == %.20f from C and %.20f from %s.\n", \ - (double)in(0, i), (double)in(1, i), (double)c_result, (double)result(i), \ - target.to_string().c_str()); \ - num_errors++; \ - } \ - } \ +#define fun_2(type_ret, type, name, c_name) \ + void test_##type##_##name(Buffer in) { \ + Type type_of_type = type_of(); \ + Target target = get_jit_target_from_environment(); \ + if (!target.supports_type(type_of_type)) { \ + return; \ + } \ + if (target.has_feature(Target::Vulkan) && (type_of_type.is_float() && type_of_type.bits() > 32)) { \ + return; \ + } \ + Func test_##name("test_" #name); \ + Var x("x"), xi("xi"); \ + test_##name(x) = name(in(0, x), in(1, x)); \ + if (target.has_gpu_feature()) { \ + test_##name.gpu_tile(x, xi, 8); \ + } else if (target.has_feature(Target::HVX)) { \ + test_##name.hexagon(); \ + } \ + Buffer result = test_##name.realize({in.height()}, target); \ + for (int i = 0; i < in.height(); i++) { \ + type_ret c_result = c_name(in(0, i), in(1, i)); \ + if (!relatively_equal(c_result, result(i), target)) { \ + fprintf(stderr, "For " #name "(%.20f, %.20f) == %.20f from C and %.20f from %s.\n", \ + (double)in(0, i), (double)in(1, i), (double)c_result, (double)result(i), \ + target.to_string().c_str()); \ + num_errors++; \ + } \ + } \ } // clang-format off diff --git a/test/correctness/mul_div_mod.cpp b/test/correctness/mul_div_mod.cpp index ce1d1e13abea..f4f41c8fc9f8 100644 --- a/test/correctness/mul_div_mod.cpp +++ b/test/correctness/mul_div_mod.cpp @@ -551,6 +551,7 @@ int main(int argc, char **argv) { std::vector vector_widths = {1}; if (target.has_feature(Target::Metal) || target.has_feature(Target::D3D12Compute) || + target.has_feature(Target::Vulkan) || target.has_feature(Target::WebGPU)) { for (int i = 2; i <= 4; i *= 2) { vector_widths.push_back(i); @@ -579,7 +580,7 @@ int main(int argc, char **argv) { if (!sharder.should_run(t)) continue; const auto &task = tasks.at(t); if (!task.fn()) { - exit(1); + exit(-1); } } diff --git a/test/correctness/newtons_method.cpp b/test/correctness/newtons_method.cpp index 0152f40ea776..bdd8652b28a9 100644 --- a/test/correctness/newtons_method.cpp +++ b/test/correctness/newtons_method.cpp @@ -10,8 +10,14 @@ using namespace Halide; template int find_pi() { // Skip test if data type is not supported by the target. + Type type = type_of(); Target target = get_jit_target_from_environment(); - if (!target.supports_type(type_of())) { + if (!target.supports_type(type)) { + return 0; + } + + // Vulkan lacks trig functions for 64-bit floats ... skip + if (target.has_feature(Target::Vulkan) && (type.is_float() && type.bits() > 32)) { return 0; } @@ -53,8 +59,9 @@ int find_pi() { T secant_result = evaluate_may_gpu(g()[0]); - // Trig in openglcompute/d3d12 is approximate - float tolerance = target.has_feature(Target::OpenGLCompute) || + // Trig in vulkan/openglcompute/d3d12 is approximate + float tolerance = target.has_feature(Target::Vulkan) || + target.has_feature(Target::OpenGLCompute) || target.has_feature(Target::D3D12Compute) ? 1e-5f : 1e-20f; diff --git a/test/correctness/round.cpp b/test/correctness/round.cpp index 5417940a0534..9cf3d78f625c 100644 --- a/test/correctness/round.cpp +++ b/test/correctness/round.cpp @@ -15,6 +15,13 @@ bool test(Expr e, const char *funcname, int vector_width, int N, Buffer &inpu printf("(Target does not support (%s x %d), skipping...)\n", type_of() == Float(32) ? "float" : "double", vector_width); return true; } + if (e.type() == Float(64) && + ((t.has_feature(Target::OpenCL) && !t.has_feature(Target::CLDoubles)) || + t.has_feature(Target::Vulkan) || + t.has_feature(Target::Metal) || + t.has_feature(Target::D3D12Compute))) { + return true; + } f.gpu_single_thread(); } else if (vector_width > 1) { f.vectorize(x, vector_width); diff --git a/test/correctness/simd_op_check.h b/test/correctness/simd_op_check.h index 29bce1207a85..d97f2c72b90c 100644 --- a/test/correctness/simd_op_check.h +++ b/test/correctness/simd_op_check.h @@ -373,6 +373,10 @@ class SimdOpCheckTest { std::cout << "simd_op_check test seed: " << seed << "\n"; for (const auto &t : targets_to_test) { + if (!t.supported()) { + std::cout << "[SKIP] Unsupported target: " << t << "\n"; + return 0; + } SIMDOpCheckT test(t); if (!t.supported()) { diff --git a/test/correctness/vector_cast.cpp b/test/correctness/vector_cast.cpp index ff4a60d92e6e..3b6eae0fa2e6 100644 --- a/test/correctness/vector_cast.cpp +++ b/test/correctness/vector_cast.cpp @@ -30,6 +30,13 @@ bool is_type_supported(int vec_width, const Target &target) { if (target.has_feature(Target::HVX)) { device = DeviceAPI::Hexagon; } + if (target.has_feature(Target::Vulkan)) { + if (type_of() == Float(64)) { + if ((target.os == Target::OSX || target.os == Target::IOS)) { + return false; // MoltenVK doesn't support Float64 + } + } + } return target.supports_type(type_of().with_lanes(vec_width), device); } diff --git a/test/correctness/widening_reduction.cpp b/test/correctness/widening_reduction.cpp index 6a99beaaa670..43b79486e03a 100644 --- a/test/correctness/widening_reduction.cpp +++ b/test/correctness/widening_reduction.cpp @@ -7,6 +7,13 @@ using namespace Halide::ConciseCasts; using namespace Halide::Internal; int main(int arch, char **argv) { + + Halide::Target target = get_jit_target_from_environment(); + if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) { + printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n"); + return 0; + } + const int W = 256, H = 256; Buffer in(W, H); @@ -39,7 +46,6 @@ int main(int arch, char **argv) { f(x, y) = u8_sat(sum(i16(input(x + r.x, y + r.y)) * kernel(r.x, r.y)) / 16); // Schedule. - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { f.gpu_tile(x, y, xi, yi, 16, 16); } else if (target.has_feature(Target::HVX)) { @@ -81,7 +87,6 @@ int main(int arch, char **argv) { g(x, y) = u8_sat((f(x, y)[0] + f(x, y)[1]) / 16); // Schedule. - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { g.gpu_tile(x, y, xi, yi, 16, 16); } else if (target.has_feature(Target::HVX)) { @@ -122,7 +127,6 @@ int main(int arch, char **argv) { g(x, y) = u8_sat((f(x, y) + f(x + 1, y)) / 2); // Schedule. - Target target = get_jit_target_from_environment(); if (target.has_gpu_feature()) { g.gpu_tile(x, y, xi, yi, 16, 16); } else if (target.has_feature(Target::HVX)) { diff --git a/test/generator/CMakeLists.txt b/test/generator/CMakeLists.txt index a294cddc3e79..78328685fcd5 100644 --- a/test/generator/CMakeLists.txt +++ b/test/generator/CMakeLists.txt @@ -125,6 +125,9 @@ function(_add_one_aot_test TARGET) if ("${Halide_TARGET}" MATCHES "cuda") target_compile_definitions("${TARGET}" PRIVATE TEST_CUDA) endif () + if ("${Halide_TARGET}" MATCHES "vulkan") + target_compile_definitions("${TARGET}" PRIVATE TEST_VULKAN) + endif () if ("${Halide_TARGET}" MATCHES "webgpu") target_compile_definitions("${TARGET}" PRIVATE TEST_WEBGPU) target_include_directories("${TARGET}" PRIVATE ${args_INCLUDES} "${Halide_SOURCE_DIR}/src/runtime") @@ -380,7 +383,8 @@ _add_halide_aot_tests(float16_t) # gpu_multi_context_threaded_aottest.cpp # gpu_multi_context_threaded_generator.cpp # (Doesn't build/link properly under wasm, and isn't useful there anyway) -if (NOT ${_USING_WASM}) +# (Vulkan doesn't build/link properly and adding custom context creation is too much effort) +if ((NOT ${_USING_WASM}) AND (NOT Halide_TARGET MATCHES "vulkan")) add_halide_generator(gpu_multi_context_threaded.generator SOURCES gpu_multi_context_threaded_generator.cpp) _add_halide_libraries(gpu_multi_context_threaded_add diff --git a/test/performance/async_gpu.cpp b/test/performance/async_gpu.cpp index ec5f487b07ab..9d78efe4022e 100644 --- a/test/performance/async_gpu.cpp +++ b/test/performance/async_gpu.cpp @@ -20,6 +20,11 @@ int main(int argc, char **argv) { return 0; } + if (target.has_feature(Target::Vulkan)) { + printf("[SKIP] Skipping test for Vulkan. Async performance needs to be improved before this test will pass.\n"); + return 0; + } + if (!target.has_gpu_feature()) { printf("[SKIP] No GPU target enabled.\n"); return 0; diff --git a/test/runtime/block_allocator.cpp b/test/runtime/block_allocator.cpp index 1dae18f31c28..b56c817e1f4e 100644 --- a/test/runtime/block_allocator.cpp +++ b/test/runtime/block_allocator.cpp @@ -13,7 +13,7 @@ namespace { size_t allocated_region_memory = 0; size_t allocated_block_memory = 0; -void allocate_block(void *user_context, MemoryBlock *block) { +int allocate_block(void *user_context, MemoryBlock *block) { block->handle = allocate_system(user_context, block->size); allocated_block_memory += block->size; @@ -22,9 +22,11 @@ void allocate_block(void *user_context, MemoryBlock *block) { << "block_size=" << int32_t(block->size) << " " << "allocated_block_memory=" << int32_t(allocated_block_memory) << " " << ") !\n"; + + return halide_error_code_success; } -void deallocate_block(void *user_context, MemoryBlock *block) { +int deallocate_block(void *user_context, MemoryBlock *block) { deallocate_system(user_context, block->handle); allocated_block_memory -= block->size; @@ -33,9 +35,11 @@ void deallocate_block(void *user_context, MemoryBlock *block) { << "block_size=" << int32_t(block->size) << " " << "allocated_block_memory=" << int32_t(allocated_block_memory) << " " << ") !\n"; + + return halide_error_code_success; } -void allocate_region(void *user_context, MemoryRegion *region) { +int allocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)1; allocated_region_memory += region->size; @@ -44,9 +48,11 @@ void allocate_region(void *user_context, MemoryRegion *region) { << "region_size=" << int32_t(region->size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") !\n"; + + return halide_error_code_success; } -void deallocate_region(void *user_context, MemoryRegion *region) { +int deallocate_region(void *user_context, MemoryRegion *region) { region->handle = (void *)0; allocated_region_memory -= region->size; @@ -55,6 +61,8 @@ void deallocate_region(void *user_context, MemoryRegion *region) { << "region_size=" << int32_t(region->size) << " " << "allocated_region_memory=" << int32_t(allocated_region_memory) << " " << ") !\n"; + + return halide_error_code_success; } } // end namespace @@ -94,6 +102,16 @@ int main(int argc, char **argv) { instance->reclaim(user_context, r1); HALIDE_CHECK(user_context, allocated_region_memory == (1 * request.size)); + MemoryRegion *r3 = instance->reserve(user_context, request); + halide_abort_if_false(user_context, r3 != nullptr); + halide_abort_if_false(user_context, allocated_block_memory == config.minimum_block_size); + halide_abort_if_false(user_context, allocated_region_memory == (2 * request.size)); + instance->retain(user_context, r3); + halide_abort_if_false(user_context, allocated_region_memory == (2 * request.size)); + instance->release(user_context, r3); + halide_abort_if_false(user_context, allocated_region_memory == (2 * request.size)); + instance->reclaim(user_context, r3); + instance->destroy(user_context); debug(user_context) << "Test : block_allocator::destroy (" << "allocated_block_memory=" << int32_t(allocated_block_memory) << " " @@ -112,7 +130,7 @@ int main(int argc, char **argv) { HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); } - // stress test + // allocation stress test { BlockAllocator::Config config = {0}; config.minimum_block_size = 1024; @@ -151,6 +169,57 @@ int main(int argc, char **argv) { HALIDE_CHECK(user_context, get_allocated_system_memory() == 0); } + // reuse stress test + { + BlockAllocator::Config config = {0}; + config.minimum_block_size = 1024; + + BlockAllocator::MemoryAllocators allocators = {system_allocator, block_allocator, region_allocator}; + BlockAllocator *instance = BlockAllocator::create(user_context, config, allocators); + + MemoryRequest request = {0}; + request.size = sizeof(int); + request.alignment = sizeof(int); + request.properties.visibility = MemoryVisibility::DefaultVisibility; + request.properties.caching = MemoryCaching::DefaultCaching; + request.properties.usage = MemoryUsage::DefaultUsage; + + size_t total_allocation_size = 0; + static size_t test_allocations = 1000; + PointerTable pointers(user_context, test_allocations, system_allocator); + for (size_t n = 0; n < test_allocations; ++n) { + size_t count = n % 32; + count = count > 1 ? count : 1; + request.size = count * sizeof(int); + total_allocation_size += request.size; + MemoryRegion *region = instance->reserve(user_context, request); + pointers.append(user_context, region); + } + + for (size_t n = 0; n < pointers.size(); ++n) { + MemoryRegion *region = static_cast(pointers[n]); + instance->release(user_context, region); // release but don't destroy + } + pointers.clear(user_context); + halide_abort_if_false(user_context, allocated_region_memory >= total_allocation_size); + + // reallocate and reuse + for (size_t n = 0; n < test_allocations; ++n) { + size_t count = n % 32; + count = count > 1 ? count : 1; + request.size = count * sizeof(int); + MemoryRegion *region = instance->reserve(user_context, request); + pointers.append(user_context, region); + } + + pointers.destroy(user_context); + instance->destroy(user_context); + halide_abort_if_false(user_context, allocated_block_memory == 0); + + BlockAllocator::destroy(user_context, instance); + halide_abort_if_false(user_context, get_allocated_system_memory() == 0); + } + print(user_context) << "Success!\n"; return 0; }