diff --git a/CMakeLists.txt b/CMakeLists.txt index d1e32a6..75728da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,23 +82,12 @@ if (NVRHI_WITH_NVAPI AND NOT TARGET nvapi) endif() if (NVRHI_WITH_AFTERMATH AND NOT TARGET aftermath) - find_package(Aftermath REQUIRED) - - if (AFTERMATH_FOUND) - add_library(aftermath SHARED IMPORTED GLOBAL) - target_include_directories(aftermath INTERFACE "${AFTERMATH_INCLUDE_DIR}") - if (WIN32) - set_property(TARGET aftermath PROPERTY IMPORTED_IMPLIB "${AFTERMATH_LIBRARY}") - endif() - set_property(TARGET aftermath PROPERTY IMPORTED_LOCATION "${AFTERMATH_RUNTIME_LIBRARY}") - file(COPY ${AFTERMATH_RUNTIME_LIBRARY} DESTINATION ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) - else() - message(FATAL_ERROR "NVRHI_WITH_AFTERMATH is enabled but cmake cannot find the Aftermath SDK in AFTERMATH_SEARCH_PATHS") - endif() + include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/FetchAftermath.cmake") endif() set(include_common include/nvrhi/nvrhi.h + include/nvrhi/nvrhiHLSL.h include/nvrhi/utils.h include/nvrhi/common/containers.h include/nvrhi/common/misc.h diff --git a/cmake/FetchAftermath.cmake b/cmake/FetchAftermath.cmake new file mode 100644 index 0000000..34e46b3 --- /dev/null +++ b/cmake/FetchAftermath.cmake @@ -0,0 +1,65 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +if( TARGET aftermath ) + return() +endif() + +if (NOT AFTERMATH_SEARCH_PATHS) + include(FetchContent) + if(WIN32) + set(AFTERMATH_SDK_URL https://developer.nvidia.com/downloads/assets/tools/secure/nsight-aftermath-sdk/2024_3_0/windows/NVIDIA_Nsight_Aftermath_SDK_2024.3.0.24312.zip) + set(AFTERMATH_SDK_MD5 232145E8A749F873B8EE32B9DA6F09D6) + else() + set(AFTERMATH_SDK_URL https://developer.nvidia.com/downloads/assets/tools/secure/nsight-aftermath-sdk/2024_3_0/linux/NVIDIA_Nsight_Aftermath_SDK_2024.3.0.24312.tgz) + set(AFTERMATH_SDK_MD5 C118C60F7A8D8302AF53513EA4FF1ABD) + endif() + + FetchContent_Declare( + aftermath_sdk + URL ${AFTERMATH_SDK_URL} + URL_HASH MD5=${AFTERMATH_SDK_MD5} + DOWNLOAD_EXTRACT_TIMESTAMP TRUE) + set(AFTERMATH_SEARCH_PATHS "${CMAKE_BINARY_DIR}/_deps/aftermath_sdk-src/") + FetchContent_MakeAvailable(aftermath_sdk) +endif() + +find_path(AFTERMATH_INCLUDE_DIR GFSDK_Aftermath.h + PATHS ${AFTERMATH_SEARCH_PATHS} + PATH_SUFFIXES "include") + +find_library(AFTERMATH_LIBRARY GFSDK_Aftermath_Lib.x64 + PATHS ${AFTERMATH_SEARCH_PATHS} + REQUIRED + PATH_SUFFIXES "lib/x64") + +add_library(aftermath SHARED IMPORTED) +target_include_directories(aftermath INTERFACE ${AFTERMATH_INCLUDE_DIR}) + +if(WIN32) + find_file(AFTERMATH_RUNTIME_LIBRARY GFSDK_Aftermath_Lib.x64.dll + PATHS ${AFTERMATH_SEARCH_PATHS} + PATH_SUFFIXES "lib/x64") + set_property(TARGET aftermath PROPERTY IMPORTED_LOCATION ${AFTERMATH_RUNTIME_LIBRARY}) + set_property(TARGET aftermath PROPERTY IMPORTED_IMPLIB ${AFTERMATH_LIBRARY}) +else() + set_property(TARGET aftermath PROPERTY IMPORTED_LOCATION ${AFTERMATH_LIBRARY}) +endif() diff --git a/cmake/FindAftermath.cmake b/cmake/FindAftermath.cmake deleted file mode 100644 index 9b2ec28..0000000 --- a/cmake/FindAftermath.cmake +++ /dev/null @@ -1,80 +0,0 @@ -# -# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - - -find_package(PackageHandleStandardArgs) - - -if (NOT AFTERMATH_SEARCH_PATHS) - set (AFTERMATH_SEARCH_PATHS - "${CMAKE_SOURCE_DIR}/aftermath" - "${CMAKE_PROJECT_DIR}/aftermath") -endif() - -if (WIN32) - if (CMAKE_SIZEOF_VOID_P EQUAL 8) - find_library(AFTERMATH_LIBRARY GFSDK_Aftermath_Lib.x64 - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x64") - find_file(AFTERMATH_RUNTIME_LIBRARY GFSDK_Aftermath_Lib.x64.dll - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x64") - else() - find_library(AFTERMATH_LIBRARY GFSDK_Aftermath_Lib.x86 - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x86") - find_library(AFTERMATH_RUNTIME_LIBRARY GFSDK_Aftermath_Lib.x86.dll - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x86") - endif() -else() - if (CMAKE_SIZEOF_VOID_P EQUAL 8) - find_library(AFTERMATH_RUNTIME_LIBRARY libGFSDK_Aftermath_Lib.x64.so - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x64") - else() - find_library(AFTERMATH_RUNTIME_LIBRARY libGFSDK_Aftermath_Lib.x86.so - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "lib/x86") - endif() -endif() - -find_path(AFTERMATH_INCLUDE_DIR GFSDK_Aftermath.h - PATHS ${AFTERMATH_SEARCH_PATHS} - PATH_SUFFIXES "include") - -include(FindPackageHandleStandardArgs) - -if (WIN32) - find_package_handle_standard_args(Aftermath - REQUIRED_VARS - AFTERMATH_INCLUDE_DIR - AFTERMATH_LIBRARY - AFTERMATH_RUNTIME_LIBRARY - ) -else() - find_package_handle_standard_args(Aftermath - REQUIRED_VARS - AFTERMATH_INCLUDE_DIR - AFTERMATH_RUNTIME_LIBRARY - ) -endif() - diff --git a/include/nvrhi/common/resource.h b/include/nvrhi/common/resource.h index f935589..7b885d0 100644 --- a/include/nvrhi/common/resource.h +++ b/include/nvrhi/common/resource.h @@ -82,6 +82,7 @@ namespace nvrhi constexpr ObjectType VK_PipelineLayout = 0x00030012; constexpr ObjectType VK_Pipeline = 0x00030013; constexpr ObjectType VK_Micromap = 0x00030014; + constexpr ObjectType VK_ImageCreateInfo = 0x00030015; }; struct Object diff --git a/include/nvrhi/d3d12.h b/include/nvrhi/d3d12.h index be34d14..e6b6f7e 100644 --- a/include/nvrhi/d3d12.h +++ b/include/nvrhi/d3d12.h @@ -123,7 +123,17 @@ namespace nvrhi::d3d12 uint32_t shaderResourceViewHeapSize = 16384; uint32_t samplerHeapSize = 1024; uint32_t maxTimerQueries = 256; + + // If enabled and the device has the capability, + // create RootSignatures with D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED + // and D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED + bool enableHeapDirectlyIndexed = false; + bool aftermathEnabled = false; + + // Enable logging the buffer lifetime to IMessageCallback + // Useful for debugging resource lifetimes + bool logBufferLifetime = false; }; NVRHI_API DeviceHandle createDevice(const DeviceDesc& desc); diff --git a/include/nvrhi/nvrhi.h b/include/nvrhi/nvrhi.h index 69b0d6f..0b0c6fa 100644 --- a/include/nvrhi/nvrhi.h +++ b/include/nvrhi/nvrhi.h @@ -22,8 +22,10 @@ #pragma once + #include #include +#include #include #include @@ -735,6 +737,7 @@ namespace nvrhi { public: [[nodiscard]] virtual const BufferDesc& getDesc() const = 0; + [[nodiscard]] virtual GpuVirtualAddress getGpuVirtualAddress() const = 0; }; typedef RefCountPtr BufferHandle; @@ -827,21 +830,24 @@ namespace nvrhi float f; } value; - static ShaderSpecialization UInt32(uint32_t constantID, uint32_t u) { + static ShaderSpecialization UInt32(uint32_t constantID, uint32_t u) + { ShaderSpecialization s; s.constantID = constantID; s.value.u = u; return s; } - static ShaderSpecialization Int32(uint32_t constantID, int32_t i) { + static ShaderSpecialization Int32(uint32_t constantID, int32_t i) + { ShaderSpecialization s; s.constantID = constantID; s.value.i = i; return s; } - static ShaderSpecialization Float(uint32_t constantID, float f) { + static ShaderSpecialization Float(uint32_t constantID, float f) + { ShaderSpecialization s; s.constantID = constantID; s.value.f = f; @@ -1219,7 +1225,7 @@ namespace nvrhi SamplerDesc& setMinFilter(bool enable) { minFilter = enable; return *this; } SamplerDesc& setMagFilter(bool enable) { magFilter = enable; return *this; } SamplerDesc& setMipFilter(bool enable) { mipFilter = enable; return *this; } - SamplerDesc& setAllFilters (bool enable) { minFilter = magFilter = mipFilter = enable; return *this; } + SamplerDesc& setAllFilters(bool enable) { minFilter = magFilter = mipFilter = enable; return *this; } SamplerDesc& setAddressU(SamplerAddressMode mode) { addressU = mode; return *this; } SamplerDesc& setAddressV(SamplerAddressMode mode) { addressV = mode; return *this; } SamplerDesc& setAddressW(SamplerAddressMode mode) { addressW = mode; return *this; } @@ -1227,8 +1233,8 @@ namespace nvrhi SamplerDesc& setReductionType(SamplerReductionType type) { reductionType = type; return *this; } }; - class ISampler : public IResource - { + class ISampler : public IResource + { public: [[nodiscard]] virtual const SamplerDesc& getDesc() const = 0; }; @@ -1432,7 +1438,9 @@ namespace nvrhi enum class GeometryType : uint8_t { Triangles = 0, - AABBs = 1 + AABBs = 1, + Spheres = 2, + Lss = 3 }; struct GeometryAABB @@ -1447,10 +1455,10 @@ namespace nvrhi struct GeometryTriangles { - IBuffer* indexBuffer = nullptr; // make sure the first fields in both Triangles - IBuffer* vertexBuffer = nullptr; // and AABBs are IBuffer* for easier debugging + IBuffer* indexBuffer = nullptr; // make sure the first 2 fields in all Geometry + IBuffer* vertexBuffer = nullptr; // structs are IBuffer* for easier debugging Format indexFormat = Format::UNKNOWN; - Format vertexFormat = Format::UNKNOWN; + Format vertexFormat = Format::UNKNOWN; // See D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC for accepted formats and how they are interpreted uint64_t indexOffset = 0; uint64_t vertexOffset = 0; uint32_t indexCount = 0; @@ -1495,12 +1503,94 @@ namespace nvrhi GeometryAABBs& setStride(uint32_t value) { stride = value; return *this; } }; + struct GeometrySpheres + { + IBuffer* indexBuffer = nullptr; + IBuffer* vertexBuffer = nullptr; + Format indexFormat = Format::UNKNOWN; + Format vertexPositionFormat = Format::UNKNOWN; + Format vertexRadiusFormat = Format::UNKNOWN; + uint64_t indexOffset = 0; + uint64_t vertexPositionOffset = 0; + uint64_t vertexRadiusOffset = 0; + uint32_t indexCount = 0; + uint32_t vertexCount = 0; + uint32_t indexStride = 0; + uint32_t vertexPositionStride = 0; + uint32_t vertexRadiusStride = 0; + + GeometrySpheres& setIndexBuffer(IBuffer* value) { indexBuffer = value; return *this; } + GeometrySpheres& setVertexBuffer(IBuffer* value) { vertexBuffer = value; return *this; } + GeometrySpheres& setIndexFormat(Format value) { indexFormat = value; return *this; } + GeometrySpheres& setVertexPositionFormat(Format value) { vertexPositionFormat = value; return *this; } + GeometrySpheres& setVertexRadiusFormat(Format value) { vertexRadiusFormat = value; return *this; } + GeometrySpheres& setIndexOffset(uint64_t value) { indexOffset = value; return *this; } + GeometrySpheres& setVertexPositionOffset(uint64_t value) { vertexPositionOffset = value; return *this; } + GeometrySpheres& setVertexRadiusOffset(uint64_t value) { vertexRadiusOffset = value; return *this; } + GeometrySpheres& setIndexCount(uint32_t value) { indexCount = value; return *this; } + GeometrySpheres& setVertexCount(uint32_t value) { vertexCount = value; return *this; } + GeometrySpheres& setIndexStride(uint32_t value) { indexStride = value; return *this; } + GeometrySpheres& setVertexPositionStride(uint32_t value) { vertexPositionStride = value; return *this; } + GeometrySpheres& setVertexRadiusStride(uint32_t value) { vertexRadiusStride = value; return *this; } + }; + + enum class GeometryLssPrimitiveFormat : uint8_t + { + List = 0, + SuccessiveImplicit = 1 + }; + + enum class GeometryLssEndcapMode : uint8_t + { + None = 0, + Chained = 1 + }; + + struct GeometryLss + { + IBuffer* indexBuffer = nullptr; + IBuffer* vertexBuffer = nullptr; + Format indexFormat = Format::UNKNOWN; + Format vertexPositionFormat = Format::UNKNOWN; + Format vertexRadiusFormat = Format::UNKNOWN; + uint64_t indexOffset = 0; + uint64_t vertexPositionOffset = 0; + uint64_t vertexRadiusOffset = 0; + uint32_t indexCount = 0; + uint32_t primitiveCount = 0; + uint32_t vertexCount = 0; + uint32_t indexStride = 0; + uint32_t vertexPositionStride = 0; + uint32_t vertexRadiusStride = 0; + GeometryLssPrimitiveFormat primitiveFormat = GeometryLssPrimitiveFormat::List; + GeometryLssEndcapMode endcapMode = GeometryLssEndcapMode::None; + + GeometryLss& setIndexBuffer(IBuffer* value) { indexBuffer = value; return *this; } + GeometryLss& setVertexBuffer(IBuffer* value) { vertexBuffer = value; return *this; } + GeometryLss& setIndexFormat(Format value) { indexFormat = value; return *this; } + GeometryLss& setVertexPositionFormat(Format value) { vertexPositionFormat = value; return *this; } + GeometryLss& setVertexRadiusFormat(Format value) { vertexRadiusFormat = value; return *this; } + GeometryLss& setIndexOffset(uint64_t value) { indexOffset = value; return *this; } + GeometryLss& setVertexPositionOffset(uint64_t value) { vertexPositionOffset = value; return *this; } + GeometryLss& setVertexRadiusOffset(uint64_t value) { vertexRadiusOffset = value; return *this; } + GeometryLss& setIndexCount(uint32_t value) { indexCount = value; return *this; } + GeometryLss& setPrimitiveCount(uint32_t value) { primitiveCount = value; return *this; } + GeometryLss& setVertexCount(uint32_t value) { vertexCount = value; return *this; } + GeometryLss& setIndexStride(uint32_t value) { indexStride = value; return *this; } + GeometryLss& setVertexPositionStride(uint32_t value) { vertexPositionStride = value; return *this; } + GeometryLss& setVertexRadiusStride(uint32_t value) { vertexRadiusStride = value; return *this; } + GeometryLss& setPrimitiveFormat(GeometryLssPrimitiveFormat value) { primitiveFormat = value; return *this; } + GeometryLss& setEndcapMode(GeometryLssEndcapMode value) { endcapMode = value; return *this; } + }; + struct GeometryDesc { union GeomTypeUnion { GeometryTriangles triangles; GeometryAABBs aabbs; + GeometrySpheres spheres; + GeometryLss lss; } geometryData; bool useTransform = false; @@ -1514,6 +1604,8 @@ namespace nvrhi GeometryDesc& setFlags(GeometryFlags value) { flags = value; return *this; } GeometryDesc& setTriangles(const GeometryTriangles& value) { geometryData.triangles = value; geometryType = GeometryType::Triangles; return *this; } GeometryDesc& setAABBs(const GeometryAABBs& value) { geometryData.aabbs = value; geometryType = GeometryType::AABBs; return *this; } + GeometryDesc& setSpheres(const GeometrySpheres& value) { geometryData.spheres = value; geometryType = GeometryType::Spheres; return *this; } + GeometryDesc& setLss(const GeometryLss& value) { geometryData.lss = value; geometryType = GeometryType::Lss; return *this; } }; enum class InstanceFlags : unsigned @@ -1536,7 +1628,8 @@ namespace nvrhi unsigned instanceMask : 8; unsigned instanceContributionToHitGroupIndex : 24; InstanceFlags flags : 8; - union { + union + { IAccelStruct* bottomLevelAS; // for buildTopLevelAccelStruct uint64_t blasDeviceAddress; // for buildTopLevelAccelStructFromBuffer - use IAccelStruct::getDeviceAddress() }; @@ -1560,6 +1653,7 @@ namespace nvrhi }; static_assert(sizeof(InstanceDesc) == 64, "sizeof(InstanceDesc) is supposed to be 64 bytes"); + static_assert(sizeof(IndirectInstanceDesc) == sizeof(InstanceDesc)); enum class AccelStructBuildFlags : uint8_t { @@ -1611,6 +1705,139 @@ namespace nvrhi }; typedef RefCountPtr AccelStructHandle; + + + ////////////////////////////////////////////////////////////////////////// + // Clusters + ////////////////////////////////////////////////////////////////////////// + namespace cluster + { + enum class OperationType : uint8_t + { + Move, // Moves CLAS, CLAS Templates, or Cluster BLAS + ClasBuild, // Builds CLAS from clusters of triangles + ClasBuildTemplates, // Builds CLAS templates from triangles + ClasInstantiateTemplates, // Instantiates CLAS templates + BlasBuild // Builds Cluster BLAS from CLAS + }; + + enum class OperationMoveType : uint8_t + { + BottomLevel, // Moved objects are Clustered BLAS + ClusterLevel, // Moved objects are CLAS + Template // Moved objects are Cluster Templates + }; + + enum class OperationMode : uint8_t + { + ImplicitDestinations, // Provide total buffer space, driver places results within, returns VAs and actual sizes + ExplicitDestinations, // Provide individual target VAs, driver places them there, returns actual sizes + GetSizes // Get minimum size per element + }; + + enum class OperationFlags : uint8_t + { + None = 0x0, + FastTrace = 0x1, + FastBuild = 0x2, + NoOverlap = 0x4, + AllowOMM = 0x8 + }; + NVRHI_ENUM_CLASS_FLAG_OPERATORS(OperationFlags); + + enum class OperationIndexFormat : uint8_t + { + IndexFormat8bit = 1, + IndexFormat16bit = 2, + IndexFormat32bit = 4 + }; + + struct OperationSizeInfo + { + uint64_t resultMaxSizeInBytes = 0; + uint64_t scratchSizeInBytes = 0; + }; + + struct OperationMoveParams + { + OperationMoveType type; + uint32_t maxBytes = 0; + }; + + struct OperationClasBuildParams + { + // See D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC for accepted formats and how they are interpreted + Format vertexFormat = Format::RGB32_FLOAT; + + // Index of the last geometry in a single CLAS + uint32_t maxGeometryIndex = 0; + + // Maximum number of unique geometries in a single CLAS + uint32_t maxUniqueGeometryCount = 1; + + // Maximum number of triangles in a single CLAS + uint32_t maxTriangleCount = 0; + + // Maximum number of vertices in a single CLAS + uint32_t maxVertexCount = 0; + + // Maximum number of triangles summed over all CLAS (in the current cluster operation) + uint32_t maxTotalTriangleCount = 0; + + // Maximum number of vertices summed over all CLAS (in the current cluster operation) + uint32_t maxTotalVertexCount = 0; + + // Minimum number of bits to be truncated in vertex positions across all CLAS (in the current cluster operation) + uint32_t minPositionTruncateBitCount = 0; + }; + + struct OperationBlasBuildParams + { + // Maximum number of CLAS references in a single BLAS + uint32_t maxClasPerBlasCount = 0; + + // Maximum number of CLAS references summed over all BLAS (in the current cluster operation) + uint32_t maxTotalClasCount = 0; + }; + + // Params that can be used to getClusterOperationSizeInfo on this shared struct before passing to executeMultiIndirectClusterOperation + struct OperationParams + { + // Maximum number of acceleration structures (or templates) to build/instantiate/move + uint32_t maxArgCount = 0; + + OperationType type; + OperationMode mode; + OperationFlags flags; + + OperationMoveParams move; + OperationClasBuildParams clas; + OperationBlasBuildParams blas; + }; + + struct OperationDesc + { + OperationParams params; + + uint64_t scratchSizeInBytes = 0; // Size of scratch resource returned by getClusterOperationSizeInfo() scratchSizeInBytes + + // Input Resources + IBuffer* inIndirectArgCountBuffer = nullptr; // Buffer containing the number of AS to build, instantiate, or move + uint64_t inIndirectArgCountOffsetInBytes = 0; // Offset (in bytes) to where the count is in the inIndirectArgCountBuffer + IBuffer* inIndirectArgsBuffer = nullptr; // Buffer of descriptor array of format IndirectTriangleClasArgs, IndirectTriangleTemplateArgs, IndirectInstantiateTemplateArgs + uint64_t inIndirectArgsOffsetInBytes = 0; // Offset (in bytes) to where the descriptor array starts inIndirectArgsBuffer + + // In/Out Resources + IBuffer* inOutAddressesBuffer = nullptr; // Array of addresseses of CLAS, CLAS Templates, or BLAS + uint64_t inOutAddressesOffsetInBytes = 0; // Offset (in bytes) to where the addresses array starts in inOutAddressesBuffer + + // Output Resources + IBuffer* outSizesBuffer = nullptr; // Sizes (in bytes) of CLAS, CLAS Templates, or BLAS + uint64_t outSizesOffsetInBytes = 0; // Offset (in bytes) to where the output sizes array starts in outSizesBuffer + IBuffer* outAccelerationStructuresBuffer = nullptr; // Destination buffer for CLAS, CLAS Template, or BLAS data. Size must be calculated with getOperationSizeInfo or with the outSizesBuffer result of OperationMode::GetSizes + uint64_t outAccelerationStructuresOffsetInBytes = 0; // Offset (in bytes) to where the output acceleration structures starts in outAccelerationStructuresBuffer + }; + } // namespace cluster } ////////////////////////////////////////////////////////////////////////// @@ -1648,7 +1875,8 @@ namespace nvrhi uint8_t unused : 8; uint16_t size : 16; - bool operator ==(const BindingLayoutItem& b) const { + bool operator ==(const BindingLayoutItem& b) const + { return slot == b.slot && type == b.type && size == b.size; @@ -2071,6 +2299,7 @@ namespace nvrhi { public: [[nodiscard]] virtual uint32_t getCapacity() const = 0; + [[nodiscard]] virtual uint32_t getFirstDescriptorIndexInHeap() const = 0; }; typedef RefCountPtr DescriptorTableHandle; @@ -2098,7 +2327,8 @@ namespace nvrhi bool independentViewportMask = false; uint16_t renderTargetIndexOffset = 0; - bool operator ==(const SinglePassStereoState& b) const { + bool operator ==(const SinglePassStereoState& b) const + { return enabled == b.enabled && independentViewportMask == b.independentViewportMask && renderTargetIndexOffset == b.renderTargetIndexOffset; @@ -2107,7 +2337,7 @@ namespace nvrhi bool operator !=(const SinglePassStereoState& b) const { return !(*this == b); } constexpr SinglePassStereoState& setEnabled(bool value) { enabled = value; return *this; } - constexpr SinglePassStereoState& setIndependentViewportMask(bool value) { independentViewportMask= value; return *this; } + constexpr SinglePassStereoState& setIndependentViewportMask(bool value) { independentViewportMask = value; return *this; } constexpr SinglePassStereoState& setRenderTargetIndexOffset(uint16_t value) { renderTargetIndexOffset = value; return *this; } }; @@ -2151,7 +2381,8 @@ namespace nvrhi ShadingRateCombiner pipelinePrimitiveCombiner = ShadingRateCombiner::Passthrough; ShadingRateCombiner imageCombiner = ShadingRateCombiner::Passthrough; - bool operator ==(const VariableRateShadingState& b) const { + bool operator ==(const VariableRateShadingState& b) const + { return enabled == b.enabled && shadingRate == b.shadingRate && pipelinePrimitiveCombiner == b.pipelinePrimitiveCombiner @@ -2201,7 +2432,7 @@ namespace nvrhi GraphicsPipelineDesc& addBindingLayout(IBindingLayout* layout) { bindingLayouts.push_back(layout); return *this; } }; - class IGraphicsPipeline : public IResource + class IGraphicsPipeline : public IResource { public: [[nodiscard]] virtual const GraphicsPipelineDesc& getDesc() const = 0; @@ -2220,7 +2451,7 @@ namespace nvrhi ComputePipelineDesc& addBindingLayout(IBindingLayout* layout) { bindingLayouts.push_back(layout); return *this; } }; - class IComputePipeline : public IResource + class IComputePipeline : public IResource { public: [[nodiscard]] virtual const ComputePipelineDesc& getDesc() const = 0; @@ -2528,8 +2759,11 @@ namespace nvrhi RayTracingAccelStruct, RayTracingPipeline, RayTracingOpacityMicromap, + RayTracingClusters, RayQuery, ShaderExecutionReordering, + Spheres, + LinearSweptSpheres, FastGeometryShader, Meshlets, ConservativeRasterization, @@ -2538,7 +2772,8 @@ namespace nvrhi VirtualResources, ComputeQueue, CopyQueue, - ConstantBufferRanges + ConstantBufferRanges, + HeapDirectlyIndexed }; enum class MessageSeverity : uint8_t @@ -2663,6 +2898,7 @@ namespace nvrhi virtual void compactBottomLevelAccelStructs() = 0; virtual void buildTopLevelAccelStruct(rt::IAccelStruct* as, const rt::InstanceDesc* pInstances, size_t numInstances, rt::AccelStructBuildFlags buildFlags = rt::AccelStructBuildFlags::None) = 0; + virtual void executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) = 0; // A version of buildTopLevelAccelStruct that takes the instance data from a buffer on the GPU. // The buffer must be pre-filled with rt::InstanceDesc structures using a copy operation or a shader. @@ -2674,7 +2910,7 @@ namespace nvrhi virtual void endTimerQuery(ITimerQuery* query) = 0; // Command list range markers - virtual void beginMarker(const char *name) = 0; + virtual void beginMarker(const char* name) = 0; virtual void endMarker() = 0; // Enables or disables the automatic barrier placement on set[...]State, copy, write, and clear operations. @@ -2739,14 +2975,14 @@ namespace nvrhi virtual TextureHandle createHandleForNativeTexture(ObjectType objectType, Object texture, const TextureDesc& desc) = 0; virtual StagingTextureHandle createStagingTexture(const TextureDesc& d, CpuAccessMode cpuAccess) = 0; - virtual void *mapStagingTexture(IStagingTexture* tex, const TextureSlice& slice, CpuAccessMode cpuAccess, size_t *outRowPitch) = 0; + virtual void* mapStagingTexture(IStagingTexture* tex, const TextureSlice& slice, CpuAccessMode cpuAccess, size_t* outRowPitch) = 0; virtual void unmapStagingTexture(IStagingTexture* tex) = 0; virtual void getTextureTiling(ITexture* texture, uint32_t* numTiles, PackedMipDesc* desc, TileShape* tileShape, uint32_t* subresourceTilingsNum, SubresourceTiling* subresourceTilings) = 0; virtual void updateTextureTileMappings(ITexture* texture, const TextureTilesMapping* tileMappings, uint32_t numTileMappings, CommandQueue executionQueue = CommandQueue::Graphics) = 0; virtual BufferHandle createBuffer(const BufferDesc& d) = 0; - virtual void *mapBuffer(IBuffer* buffer, CpuAccessMode cpuAccess) = 0; + virtual void* mapBuffer(IBuffer* buffer, CpuAccessMode cpuAccess) = 0; virtual void unmapBuffer(IBuffer* buffer) = 0; virtual MemoryRequirements getBufferMemoryRequirements(IBuffer* buffer) = 0; virtual bool bindBufferMemory(IBuffer* buffer, IHeap* heap, uint64_t offset) = 0; @@ -2801,6 +3037,7 @@ namespace nvrhi virtual rt::OpacityMicromapHandle createOpacityMicromap(const rt::OpacityMicromapDesc& desc) = 0; virtual rt::AccelStructHandle createAccelStruct(const rt::AccelStructDesc& desc) = 0; virtual MemoryRequirements getAccelStructMemoryRequirements(rt::IAccelStruct* as) = 0; + virtual rt::cluster::OperationSizeInfo getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) = 0; virtual bool bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) = 0; virtual CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) = 0; diff --git a/include/nvrhi/nvrhiHLSL.h b/include/nvrhi/nvrhiHLSL.h new file mode 100644 index 0000000..8424941 --- /dev/null +++ b/include/nvrhi/nvrhiHLSL.h @@ -0,0 +1,131 @@ +/* +* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef NVRHI_HLSL_H +#define NVRHI_HLSL_H + +// bit field defines +#if defined(__cplusplus) || __HLSL_VERSION >= 2021 || __SLANG__ +namespace nvrhi +{ + typedef uint64_t GpuVirtualAddress; + struct GpuVirtualAddressAndStride + { + GpuVirtualAddress startAddress; + uint64_t strideInBytes; + }; + + namespace rt + { + ////////////////////////////////////////////////////////////////////////// + // Indirect Arg Structs that are shader friendly + ////////////////////////////////////////////////////////////////////////// + + // Shader friendly equivalent of nvrhi::rt::InstanceDesc + struct IndirectInstanceDesc + { +#ifdef __cplusplus + float transform[12]; +#else + float4 transform[3]; +#endif + uint32_t instanceID : 24; + uint32_t instanceMask : 8; + uint32_t instanceContributionToHitGroupIndex : 24; + uint32_t flags : 8; + GpuVirtualAddress blasDeviceAddress; + }; + + namespace cluster + { + static const uint32_t kClasByteAlignment = 128; + static const uint32_t kClasMaxTriangles = 256; // Defined by spec + static const uint32_t kClasMaxVertices = 256; // Defined by spec + static const uint32_t kMaxGeometryIndex = 16777215; // Defined by spec + + // Clone of NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS + struct IndirectTriangleClasArgs + { + uint32_t clusterId; // The user specified cluster Id to encode in the CLAS + uint32_t clusterFlags; // Values of NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS to use as Cluster Flags + uint32_t triangleCount : 9; // The number of triangles used by the CLAS (max 256) + uint32_t vertexCount : 9; // The number of vertices used by the CLAS (max 256) + uint32_t positionTruncateBitCount : 6; // The number of bits to truncate from the position values + uint32_t indexFormat : 4; // The index format to use for the indexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values + uint32_t opacityMicromapIndexFormat : 4; // The index format to use for the opacityMicromapIndexBuffer, see NVAPI_3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INDEX_FORMAT for possible values + uint32_t baseGeometryIndexAndFlags; // The base geometry index (lower 24 bit) and base geometry flags (NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS), see geometryIndexBuffer + uint16_t indexBufferStride; // The stride of the elements of indexBuffer, in bytes + uint16_t vertexBufferStride; // The stride of the elements of vertexBuffer, in bytes + uint16_t geometryIndexAndFlagsBufferStride; // The stride of the elements of geometryIndexBuffer, in bytes + uint16_t opacityMicromapIndexBufferStride; // The stride of the elements of opacityMicromapIndexBuffer, in bytes + GpuVirtualAddress indexBuffer; // The index buffer to construct the CLAS + GpuVirtualAddress vertexBuffer; // The vertex buffer to construct the CLAS + GpuVirtualAddress geometryIndexAndFlagsBuffer; // (optional) Address of an array of 32bit geometry indices and geometry flags with size equal to the triangle count. + GpuVirtualAddress opacityMicromapArray; // (optional) Address of a valid OMM array, if used NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM must be set on this and all other cluster operation calls interacting with the object(s) constructed + GpuVirtualAddress opacityMicromapIndexBuffer; // (optional) Address of an array of indices into the OMM array + }; + + // Clone of NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS + struct IndirectTriangleTemplateArgs + { + uint32_t clusterId; // The user specified cluster Id to encode in the cluster template + uint32_t clusterFlags; // Values of NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_CLUSTER_FLAGS to use as Cluster Flags + uint32_t triangleCount : 9; // The number of triangles used by the cluster template (max 256) + uint32_t vertexCount : 9; // The number of vertices used by the cluster template (max 256) + uint32_t positionTruncateBitCount : 6; // The number of bits to truncate from the position values + uint32_t indexFormat : 4; // The index format to use for the indexBuffer, must be one of nvrhi::rt::ClusteOperationIndexFormat + uint32_t opacityMicromapIndexFormat : 4; // The index format to use for the opacityMicromapIndexBuffer, see nvrhi::rt::ClusteOperationIndexFormat for possible values + uint32_t baseGeometryIndexAndFlags; // The base geometry index (lower 24 bit) and base geometry flags (NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_GEOMETRY_FLAGS), see geometryIndexBuffer + uint16_t indexBufferStride; // The stride of the elements of indexBuffer, in bytes + uint16_t vertexBufferStride; // The stride of the elements of vertexBuffer, in bytes + uint16_t geometryIndexAndFlagsBufferStride; // The stride of the elements of geometryIndexBuffer, in bytes + uint16_t opacityMicromapIndexBufferStride; // The stride of the elements of opacityMicromapIndexBuffer, in bytes + GpuVirtualAddress indexBuffer; // The index buffer to construct the cluster template + GpuVirtualAddress vertexBuffer; // (optional) The vertex buffer to optimize the cluster template, the vertices will not be stored in the cluster template + GpuVirtualAddress geometryIndexAndFlagsBuffer; // (optional) Address of an array of 32bit geometry indices and geometry flags (each 32 bit value organized the same as baseGeometryIndex) with size equal to the triangle count, if non-zero the geometry indices of the CLAS triangles will be equal to the lower 24 bit of geometryIndexBuffer[triangleIndex] + baseGeometryIndex, the geometry flags for each triangle will be the bitwise OR of the flags in the upper 8 bits of baseGeometryIndex and geometryIndexBuffer[triangleIndex] otherwise all triangles will have a geometry index equal to baseGeometryIndex + GpuVirtualAddress opacityMicromapArray; // (optional) Address of a valid OMM array, if used NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM must be set on this and all other cluster operation calls interacting with the object(s) constructed + GpuVirtualAddress opacityMicromapIndexBuffer; // (optional) Address of an array of indices into the OMM array + GpuVirtualAddress instantiationBoundingBoxLimit; // (optional) Pointer to 6 floats with alignment NVAPI_D3D12_RAYTRACING_CLUSTER_TEMPLATE_BOUNDS_BYTE_ALIGNMENT representing the limits of the positions of any vertices the template will ever be instantiated with + }; + + // Clone of NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_INSTANTIATE_TEMPLATE_ARGS + struct IndirectInstantiateTemplateArgs + { + uint32_t clusterIdOffset; // The offset added to the clusterId stored in the Cluster template to calculate the final clusterId that will be written to the instantiated CLAS + uint32_t geometryIndexOffset; // The offset added to the geometry index stored for each triangle in the Cluster template to calculate the final geometry index that will be written to the triangles of the instantiated CLAS, the resulting value may not exceed maxGeometryIndexValue both of this call, and the call used to construct the original cluster template referenced + GpuVirtualAddress clusterTemplate; // Address of a previously built cluster template to be instantiated + GpuVirtualAddressAndStride vertexBuffer; // Vertex buffer with stride to use to fetch the vertex positions used for instantiation + }; + + // Clone of NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_CLUSTER_ARGS + struct IndirectArgs + { + uint32_t clusterCount; // The size of the array referenced by clusterVAs + uint32_t reserved; // Reserved, must be 0 + GpuVirtualAddress clusterAddresses; // Address of an array of D3D12_GPU_VIRTUAL_ADDRESS holding valid addresses of CLAS previously constructed + }; + } // namespace cluster + } // namespace rt +} // namespace nvrhi + +#endif // __HLSL_VERSION 2021 +#endif \ No newline at end of file diff --git a/include/nvrhi/utils.h b/include/nvrhi/utils.h index ba9fb47..3b67675 100644 --- a/include/nvrhi/utils.h +++ b/include/nvrhi/utils.h @@ -124,4 +124,24 @@ namespace nvrhi::utils std::mutex m_Mutex; }; + // Automatic begin/end marker for command list + class ScopedMarker + { + public: + ICommandList* m_commandList; + ScopedMarker(ICommandList* commandList, const char* markerName) : m_commandList(commandList) + { + m_commandList->beginMarker(markerName); + } + + ScopedMarker(CommandListHandle* commandList, const char* markerName) : + ScopedMarker(commandList->Get(), markerName) + {} + + ~ScopedMarker() + { + m_commandList->endMarker(); + } + }; + } diff --git a/src/common/state-tracking.cpp b/src/common/state-tracking.cpp index e976bf1..de6045e 100644 --- a/src/common/state-tracking.cpp +++ b/src/common/state-tracking.cpp @@ -135,9 +135,16 @@ namespace nvrhi ResourceStates CommandListResourceStateTracker::getTextureSubresourceState(TextureStateExtension* texture, ArraySlice arraySlice, MipLevel mipLevel) { TextureState* tracking = getTextureStateTracking(texture, false); - if (!tracking) - return ResourceStates::Unknown; + { + return texture->descRef.keepInitialState ? + (texture->stateInitialized ? texture->descRef.initialState : ResourceStates::Common) : + ResourceStates::Unknown; + } + + // whole resource + if (tracking->subresourceStates.empty()) + return tracking->state; uint32_t subresource = calcSubresource(mipLevel, arraySlice, texture->descRef); return tracking->subresourceStates[subresource]; diff --git a/src/d3d11/d3d11-backend.h b/src/d3d11/d3d11-backend.h index 33dd665..812c90a 100644 --- a/src/d3d11/d3d11-backend.h +++ b/src/d3d11/d3d11-backend.h @@ -24,6 +24,7 @@ #include #include +#include #include "../common/dxgi-format.h" #include @@ -115,6 +116,7 @@ namespace nvrhi::d3d11 Buffer(const Context& context) : m_Context(context) { } const BufferDesc& getDesc() const override { return desc; } + GpuVirtualAddress getGpuVirtualAddress() const override { nvrhi::utils::NotImplemented(); return 0; } Object getNativeObject(ObjectType objectType) override; ID3D11ShaderResourceView* getSRV(Format format, BufferRange range, ResourceType type); @@ -335,6 +337,7 @@ namespace nvrhi::d3d11 void buildTopLevelAccelStruct(rt::IAccelStruct* as, const rt::InstanceDesc* pInstances, size_t numInstances, rt::AccelStructBuildFlags buildFlags) override; void buildTopLevelAccelStructFromBuffer(rt::IAccelStruct* as, nvrhi::IBuffer* instanceBuffer, uint64_t instanceBufferOffset, size_t numInstances, rt::AccelStructBuildFlags buildFlags = rt::AccelStructBuildFlags::None) override; + void executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) override; void beginTimerQuery(ITimerQuery* query) override; void endTimerQuery(ITimerQuery* query) override; @@ -498,6 +501,7 @@ namespace nvrhi::d3d11 rt::OpacityMicromapHandle createOpacityMicromap(const rt::OpacityMicromapDesc& desc) override; rt::AccelStructHandle createAccelStruct(const rt::AccelStructDesc& desc) override; MemoryRequirements getAccelStructMemoryRequirements(rt::IAccelStruct* as) override; + rt::cluster::OperationSizeInfo getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) override; bool bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) override; CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) override; diff --git a/src/d3d11/d3d11-commandlist.cpp b/src/d3d11/d3d11-commandlist.cpp index edf5af4..49801cd 100644 --- a/src/d3d11/d3d11-commandlist.cpp +++ b/src/d3d11/d3d11-commandlist.cpp @@ -222,4 +222,9 @@ namespace nvrhi::d3d11 { utils::NotSupported(); } + + void CommandList::executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc&) + { + utils::NotSupported(); + } } // namespace nvrhi::d3d11 diff --git a/src/d3d11/d3d11-device.cpp b/src/d3d11/d3d11-device.cpp index 1a6f7ff..7c1f447 100644 --- a/src/d3d11/d3d11-device.cpp +++ b/src/d3d11/d3d11-device.cpp @@ -292,6 +292,12 @@ namespace nvrhi::d3d11 return MemoryRequirements(); } + rt::cluster::OperationSizeInfo Device::getClusterOperationSizeInfo(const rt::cluster::OperationParams&) + { + utils::NotSupported(); + return rt::cluster::OperationSizeInfo(); + } + bool Device::bindAccelStructMemory(rt::IAccelStruct*, IHeap*, uint64_t) { utils::NotSupported(); diff --git a/src/d3d12/d3d12-backend.h b/src/d3d12/d3d12-backend.h index 368eb34..bf161b3 100644 --- a/src/d3d12/d3d12-backend.h +++ b/src/d3d12/d3d12-backend.h @@ -53,6 +53,19 @@ #define NVRHI_WITH_NVAPI_DISPLACEMENT_MICROMAP (0) #endif +#if NVRHI_D3D12_WITH_NVAPI && defined(NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_VER) +#define NVRHI_WITH_NVAPI_CLUSTERS (1) +#else +#define NVRHI_WITH_NVAPI_CLUSTERS (0) +#endif + +// Line-Swept Spheres were added in NVAPI SDK 572.18 +#if NVRHI_D3D12_WITH_NVAPI && (NVAPI_SDK_VERSION >= 57218) +#define NVRHI_WITH_NVAPI_LSS (1) +#else +#define NVRHI_WITH_NVAPI_LSS (0) +#endif + #include #include #include @@ -117,8 +130,10 @@ namespace nvrhi::d3d12 RefCountPtr timerQueryHeap; RefCountPtr timerQueryResolveBuffer; + bool logBufferLifetime = false; IMessageCallback* messageCallback = nullptr; void error(const std::string& message) const; + void info(const std::string& message) const; }; class StaticDescriptorHeap : public IDescriptorHeap @@ -304,6 +319,7 @@ namespace nvrhi::d3d12 ~Buffer() override; const BufferDesc& getDesc() const override { return desc; } + GpuVirtualAddress getGpuVirtualAddress() const override { return gpuVA; } Object getNativeObject(ObjectType objectType) override; @@ -637,6 +653,7 @@ namespace nvrhi::d3d12 const BindingSetDesc* getDesc() const override { return nullptr; } IBindingLayout* getLayout() const override { return nullptr; } uint32_t getCapacity() const override { return capacity; } + uint32_t getFirstDescriptorIndexInHeap() const override { return firstDescriptor; } private: DeviceResources& m_Resources; @@ -957,6 +974,7 @@ namespace nvrhi::d3d12 void buildTopLevelAccelStruct(rt::IAccelStruct* as, const rt::InstanceDesc* pInstances, size_t numInstances, rt::AccelStructBuildFlags buildFlags) override; void buildTopLevelAccelStructFromBuffer(rt::IAccelStruct* as, nvrhi::IBuffer* instanceBuffer, uint64_t instanceBufferOffset, size_t numInstances, rt::AccelStructBuildFlags buildFlags = rt::AccelStructBuildFlags::None) override; + void executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc); void beginTimerQuery(ITimerQuery* query) override; void endTimerQuery(ITimerQuery* query) override; @@ -1153,6 +1171,8 @@ namespace nvrhi::d3d12 rt::OpacityMicromapHandle createOpacityMicromap(const rt::OpacityMicromapDesc& desc) override; rt::AccelStructHandle createAccelStruct(const rt::AccelStructDesc& desc) override; MemoryRequirements getAccelStructMemoryRequirements(rt::IAccelStruct* as) override; + rt::cluster::OperationSizeInfo getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) override; + bool bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) override; nvrhi::CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) override; @@ -1203,11 +1223,16 @@ namespace nvrhi::d3d12 bool m_MeshletsSupported = false; bool m_VariableRateShadingSupported = false; bool m_OpacityMicromapSupported = false; + bool m_RayTracingClustersSupported = false; + bool m_LinearSweptSpheresSupported = false; + bool m_SpheresSupported = false; bool m_ShaderExecutionReorderingSupported = false; bool m_SamplerFeedbackSupported = false; bool m_AftermathEnabled = false; + bool m_HeapDirectlyIndexedEnabled = false; AftermathCrashDumpHelper m_AftermathCrashDumpHelper; + D3D12_FEATURE_DATA_D3D12_OPTIONS m_Options = {}; D3D12_FEATURE_DATA_D3D12_OPTIONS5 m_Options5 = {}; D3D12_FEATURE_DATA_D3D12_OPTIONS6 m_Options6 = {}; diff --git a/src/d3d12/d3d12-buffer.cpp b/src/d3d12/d3d12-buffer.cpp index 7acc32c..d944e8d 100644 --- a/src/d3d12/d3d12-buffer.cpp +++ b/src/d3d12/d3d12-buffer.cpp @@ -45,6 +45,13 @@ namespace nvrhi::d3d12 Buffer::~Buffer() { + if (m_Context.logBufferLifetime) + { + std::stringstream ss; + ss << "Release buffer: " << desc.debugName << " 0x" << std::hex << getGpuVirtualAddress(); + m_Context.info(ss.str()); + } + if (m_ClearUAV != c_InvalidDescriptorIndex) { m_Resources.shaderResourceViewHeap.releaseDescriptor(m_ClearUAV); @@ -191,6 +198,41 @@ namespace nvrhi::d3d12 GFSDK_Aftermath_DX12_RegisterResource(resource, &resourceHandle); #endif } + + if (m_Context.logBufferLifetime) + { + size_t byteDisplay = desc.byteSize; + const char* byteUnit = "B"; + + if (desc.byteSize > (1 << 20)) + { + byteDisplay = desc.byteSize >> 20; + byteUnit = "MB"; + } + else if (desc.byteSize > (1 << 10)) + { + byteDisplay = desc.byteSize >> 10; + byteUnit = "KB"; + } + + std::stringstream ss; + ss << "Create buffer: " << desc.debugName + << " Res:0x" << std::hex << reinterpret_cast(resource.Get()) + << " Gpu:0x" << std::hex << getGpuVirtualAddress() << "->0x" << std::hex << getGpuVirtualAddress() + desc.byteSize; + + if (desc.structStride) + { + ss << " (n:" << std::dec << (desc.structStride ? desc.byteSize / desc.structStride : 0) + << " stride:" << std::dec << desc.structStride + << "B size:" << std::dec << byteDisplay << byteUnit << ")"; + } + else + { + ss << " (size:" << std::dec << byteDisplay << byteUnit << ")"; + } + + m_Context.info(ss.str()); + } } DescriptorIndex Buffer::getClearUAV() diff --git a/src/d3d12/d3d12-device.cpp b/src/d3d12/d3d12-device.cpp index 0a08f7d..ad6af18 100644 --- a/src/d3d12/d3d12-device.cpp +++ b/src/d3d12/d3d12-device.cpp @@ -38,6 +38,11 @@ namespace nvrhi::d3d12 messageCallback->message(MessageSeverity::Error, message.c_str()); } + void Context::info(const std::string& message) const + { + messageCallback->message(MessageSeverity::Info, message.c_str()); + } + void WaitForFence(ID3D12Fence* fence, uint64_t value, HANDLE event) { // Test if the fence has been reached @@ -88,6 +93,7 @@ namespace nvrhi::d3d12 : m_Resources(m_Context, desc) { m_Context.device = desc.pDevice; + m_Context.logBufferLifetime = desc.logBufferLifetime; m_Context.messageCallback = desc.errorCB; if (desc.pGraphicsCommandQueue) @@ -197,20 +203,54 @@ namespace nvrhi::d3d12 if (m_NvapiIsInitialized) { NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS caps = NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_NONE; - NvAPI_D3D12_GetRaytracingCaps(m_Context.device5, NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAPS)); + NvAPI_D3D12_GetRaytracingCaps(m_Context.device5, NVAPI_D3D12_RAYTRACING_CAPS_TYPE_OPACITY_MICROMAP, &caps, sizeof(caps)); m_OpacityMicromapSupported = caps == NVAPI_D3D12_RAYTRACING_OPACITY_MICROMAP_CAP_STANDARD; } +#endif +#endif // #if NVRHI_WITH_NVAPI_OPACITY_MICROMAPS + +#if NVRHI_WITH_NVAPI_CLUSTERS + if (m_NvapiIsInitialized) + { + NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAPS clusterCaps = NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAP_NONE; + NvAPI_D3D12_GetRaytracingCaps(m_Context.device5, NVAPI_D3D12_RAYTRACING_CAPS_TYPE_CLUSTER_OPERATIONS, &clusterCaps, sizeof(clusterCaps)); + m_RayTracingClustersSupported = clusterCaps == NVAPI_D3D12_RAYTRACING_CLUSTER_OPERATIONS_CAP_STANDARD; + } +#endif // #if NVRHI_WITH_NVAPI_CLUSTERS - if (m_OpacityMicromapSupported) +#if NVRHI_WITH_NVAPI_LSS + if (m_NvapiIsInitialized) + { + NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAPS lssCaps = NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAP_NONE; + NvAPI_D3D12_GetRaytracingCaps(m_Context.device5, NVAPI_D3D12_RAYTRACING_CAPS_TYPE_LINEAR_SWEPT_SPHERES, &lssCaps, sizeof(NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAPS)); + m_LinearSweptSpheresSupported = lssCaps == NVAPI_D3D12_RAYTRACING_LINEAR_SWEPT_SPHERES_CAP_STANDARD; + + NVAPI_D3D12_RAYTRACING_SPHERES_CAPS spheresCaps = NVAPI_D3D12_RAYTRACING_SPHERES_CAP_NONE; + NvAPI_D3D12_GetRaytracingCaps(m_Context.device5, NVAPI_D3D12_RAYTRACING_CAPS_TYPE_SPHERES, &spheresCaps, sizeof(NVAPI_D3D12_RAYTRACING_SPHERES_CAPS)); + m_SpheresSupported = spheresCaps == NVAPI_D3D12_RAYTRACING_SPHERES_CAP_STANDARD; + } +#endif // #if NVRHI_WITH_NVAPI_LSS + +#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP || NVRHI_WITH_NVAPI_CLUSTERS || NVRHI_WITH_NVAPI_LSS + if (m_OpacityMicromapSupported || m_RayTracingClustersSupported || m_LinearSweptSpheresSupported || m_SpheresSupported) { NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS params = {}; params.version = NVAPI_D3D12_SET_CREATE_PIPELINE_STATE_OPTIONS_PARAMS_VER; - params.flags = NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT; + params.flags = 0; + #if NVRHI_WITH_NVAPI_OPACITY_MICROMAP + params.flags |= (m_OpacityMicromapSupported ? NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_OMM_SUPPORT : 0); + #endif + #if NVRHI_WITH_NVAPI_CLUSTERS + params.flags |= (m_RayTracingClustersSupported ? NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_CLUSTER_SUPPORT : 0); + #endif + #if NVRHI_WITH_NVAPI_LSS + params.flags |= (m_LinearSweptSpheresSupported ? NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_LSS_SUPPORT : 0); + params.flags |= (m_SpheresSupported ? NVAPI_D3D12_PIPELINE_CREATION_STATE_FLAGS_ENABLE_SPHERE_SUPPORT : 0); + #endif [[maybe_unused]] NvAPI_Status res = NvAPI_D3D12_SetCreatePipelineStateOptions(m_Context.device5, ¶ms); assert(res == NVAPI_OK); } #endif -#endif // #if NVRHI_WITH_NVAPI_OPACITY_MICROMAPS #endif // #if NVRHI_D3D12_WITH_NVAPI @@ -231,6 +271,15 @@ namespace nvrhi::d3d12 } } #endif + + if (desc.enableHeapDirectlyIndexed) + { + D3D12_FEATURE_DATA_SHADER_MODEL shaderModel = { D3D_SHADER_MODEL_6_6 }; + bool hasShaderModel = SUCCEEDED(m_Context.device->CheckFeatureSupport(D3D12_FEATURE_SHADER_MODEL, &shaderModel, sizeof(shaderModel))); + + m_HeapDirectlyIndexedEnabled = m_Options.ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 && + hasShaderModel && shaderModel.HighestShaderModel >= D3D_SHADER_MODEL_6_6; + } } Device::~Device() @@ -538,12 +587,18 @@ namespace nvrhi::d3d12 return m_RayTracingSupported; case Feature::RayTracingOpacityMicromap: return m_OpacityMicromapSupported; + case Feature::RayTracingClusters: + return m_RayTracingClustersSupported; case Feature::RayQuery: return m_TraceRayInlineSupported; case Feature::FastGeometryShader: return m_FastGeometryShaderSupported; case Feature::ShaderExecutionReordering: return m_ShaderExecutionReorderingSupported; + case Feature::Spheres: + return m_SpheresSupported; + case Feature::LinearSweptSpheres: + return m_LinearSweptSpheresSupported; case Feature::Meshlets: return m_MeshletsSupported; case Feature::VariableRateShading: @@ -568,6 +623,8 @@ namespace nvrhi::d3d12 return true; case Feature::ConstantBufferRanges: return true; + case Feature::HeapDirectlyIndexed: + return m_HeapDirectlyIndexedEnabled; default: return false; } diff --git a/src/d3d12/d3d12-raytracing.cpp b/src/d3d12/d3d12-raytracing.cpp index 5a0eef8..71a5d9d 100644 --- a/src/d3d12/d3d12-raytracing.cpp +++ b/src/d3d12/d3d12-raytracing.cpp @@ -38,7 +38,7 @@ namespace { struct RaytracingGeometryDesc { -#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP +#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP || NVRHI_WITH_NVAPI_LSS NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_EX type; #else D3D12_RAYTRACING_GEOMETRY_TYPE type; @@ -55,6 +55,10 @@ namespace // Note: this union member is currently only used to pad the structure so that it's the same size as NVAPI_D3D12_RAYTRACING_GEOMETRY_DESC_EX. // There is no support for Displacement Micro Maps in NVRHI API yet. NVAPI_D3D12_RAYTRACING_GEOMETRY_DMM_TRIANGLES_DESC dmmTriangles; +#endif +#if NVRHI_WITH_NVAPI_LSS + NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC spheres; + NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC lss; #endif }; } m_data; @@ -126,6 +130,18 @@ namespace m_data.ommTriangles = ommTriangles; } #endif + +#if NVRHI_WITH_NVAPI_LSS + void SetSpheres(const NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC& spheres) { + m_data.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_SPHERES_EX; + m_data.spheres = spheres; + } + + void SetLss(const NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC& lss) { + m_data.type = NVAPI_D3D12_RAYTRACING_GEOMETRY_TYPE_LSS_EX; + m_data.lss = lss; + } +#endif }; class D3D12BuildRaytracingAccelerationStructureInputs @@ -180,7 +196,7 @@ namespace const T GetAs(); }; -#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP +#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP || NVRHI_WITH_NVAPI_LSS template<> const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX D3D12BuildRaytracingAccelerationStructureInputs::GetAs() { NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX inputs = {}; @@ -361,7 +377,7 @@ namespace nvrhi::d3d12 uint32_t RayTracingPipeline::getShaderTableEntrySize() const { - uint32_t requiredSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES + sizeof(UINT64) * maxLocalRootParameters; + uint32_t requiredSize = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES + sizeof(uint64_t) * maxLocalRootParameters; return align(requiredSize, uint32_t(D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT)); } @@ -467,6 +483,71 @@ namespace nvrhi::d3d12 outDxrAABB.AABBCount = aabbs.count; } +#if NVRHI_WITH_NVAPI_LSS + static void fillD3dSpheresDesc(NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC& outDxrSpheres, const rt::GeometryDesc& geometryDesc) + { + const auto& spheres = geometryDesc.geometryData.spheres; + + if (spheres.indexBuffer) + outDxrSpheres.indexBuffer.StartAddress = checked_cast(spheres.indexBuffer)->gpuVA + spheres.indexOffset; + else + outDxrSpheres.indexBuffer.StartAddress = 0; + + if (spheres.vertexBuffer) + { + outDxrSpheres.vertexPositionBuffer.StartAddress = checked_cast(spheres.vertexBuffer)->gpuVA + spheres.vertexPositionOffset; + outDxrSpheres.vertexRadiusBuffer.StartAddress = checked_cast(spheres.vertexBuffer)->gpuVA + spheres.vertexRadiusOffset; + } + else + { + outDxrSpheres.vertexPositionBuffer.StartAddress = 0; + outDxrSpheres.vertexRadiusBuffer.StartAddress = 0; + } + + outDxrSpheres.indexBuffer.StrideInBytes = spheres.indexStride; + outDxrSpheres.vertexPositionBuffer.StrideInBytes = spheres.vertexPositionStride; + outDxrSpheres.vertexRadiusBuffer.StrideInBytes = spheres.vertexRadiusStride; + outDxrSpheres.indexFormat = getDxgiFormatMapping(spheres.indexFormat).srvFormat; + outDxrSpheres.vertexPositionFormat = getDxgiFormatMapping(spheres.vertexPositionFormat).srvFormat; + outDxrSpheres.vertexRadiusFormat = getDxgiFormatMapping(spheres.vertexRadiusFormat).srvFormat; + outDxrSpheres.indexCount = spheres.indexCount; + outDxrSpheres.vertexCount = spheres.vertexCount; + } + + static void fillD3dLssDesc(NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC& outDxrLss, const rt::GeometryDesc& geometryDesc) + { + const auto& lss = geometryDesc.geometryData.lss; + + if (lss.indexBuffer) + outDxrLss.indexBuffer.StartAddress = checked_cast(lss.indexBuffer)->gpuVA + lss.indexOffset; + else + outDxrLss.indexBuffer.StartAddress = 0; + + if (lss.vertexBuffer) + { + outDxrLss.vertexPositionBuffer.StartAddress = checked_cast(lss.vertexBuffer)->gpuVA + lss.vertexPositionOffset; + outDxrLss.vertexRadiusBuffer.StartAddress = checked_cast(lss.vertexBuffer)->gpuVA + lss.vertexRadiusOffset; + } + else + { + outDxrLss.vertexPositionBuffer.StartAddress = 0; + outDxrLss.vertexRadiusBuffer.StartAddress = 0; + } + + outDxrLss.indexBuffer.StrideInBytes = lss.indexStride; + outDxrLss.vertexPositionBuffer.StrideInBytes = lss.vertexPositionStride; + outDxrLss.vertexRadiusBuffer.StrideInBytes = lss.vertexRadiusStride; + outDxrLss.indexFormat = getDxgiFormatMapping(lss.indexFormat).srvFormat; + outDxrLss.vertexPositionFormat = getDxgiFormatMapping(lss.vertexPositionFormat).srvFormat; + outDxrLss.vertexRadiusFormat = getDxgiFormatMapping(lss.vertexRadiusFormat).srvFormat; + outDxrLss.indexCount = lss.indexCount; + outDxrLss.primitiveCount = lss.primitiveCount; + outDxrLss.vertexCount = lss.vertexCount; + outDxrLss.primitiveFormat = lss.primitiveFormat == nvrhi::rt::GeometryLssPrimitiveFormat::List ? NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT_LIST : NVAPI_D3D12_RAYTRACING_LSS_PRIMITIVE_FORMAT_SUCCESSIVE_IMPLICIT; + outDxrLss.endcapMode = lss.endcapMode == nvrhi::rt::GeometryLssEndcapMode::None ? NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE_NONE : NVAPI_D3D12_RAYTRACING_LSS_ENDCAP_MODE_CHAINED; + } +#endif + #if NVRHI_WITH_NVAPI_OPACITY_MICROMAP static void fillOmmAttachmentDesc(NVAPI_D3D12_RAYTRACING_GEOMETRY_OMM_ATTACHMENT_DESC& ommAttachment, const rt::GeometryDesc& geometryDesc) { @@ -517,6 +598,20 @@ namespace nvrhi::d3d12 outD3dGeometryDesc.SetTriangles(dxrTriangles); } } +#if NVRHI_WITH_NVAPI_LSS + else if (geometryDesc.geometryType == rt::GeometryType::Spheres) + { + NVAPI_D3D12_RAYTRACING_GEOMETRY_SPHERES_DESC spheres = {}; + fillD3dSpheresDesc(spheres, geometryDesc); + outD3dGeometryDesc.SetSpheres(spheres); + } + else if (geometryDesc.geometryType == rt::GeometryType::Lss) + { + NVAPI_D3D12_RAYTRACING_GEOMETRY_LSS_DESC lss = {}; + fillD3dLssDesc(lss, geometryDesc); + outD3dGeometryDesc.SetLss(lss); + } +#endif else { D3D12_RAYTRACING_GEOMETRY_AABBS_DESC dxrAABBs = {}; @@ -545,7 +640,7 @@ namespace nvrhi::d3d12 const rt::GeometryDesc& srcDesc = desc.bottomLevelGeometries[i]; // useTransform sets a non-null dummy GPU VA. The reason is explained in the spec: // "It (read: GetRaytracingAccelerationStructurePrebuildInfo) may not inspect/dereference - // any GPU virtual addresses, other than to check to see if a pointer is NULL or not, + // any GPU virtual addresses, other than to assert to see if a pointer is NULL or not, // such as the optional Transform in D3D12_RAYTRACING_GEOMETRY_TRIANGLES_DESC, without dereferencing it." // Omitting this here will trigger a gpu hang due to incorrect memory calculation. D3D12_GPU_VIRTUAL_ADDRESS transform4x4 = srcDesc.useTransform ? 16 : 0; @@ -602,7 +697,7 @@ namespace nvrhi::d3d12 D3D12BuildRaytracingAccelerationStructureInputs ASInputs; fillAsInputDescForPreBuildInfo(ASInputs, desc); -#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP +#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP || NVRHI_WITH_NVAPI_LSS if (m_NvapiIsInitialized) { const NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS_EX inputs = ASInputs.GetAs(); @@ -657,7 +752,7 @@ namespace nvrhi::d3d12 as->dataBuffer = checked_cast(buffer.Get()); } - // Sanitize the geometry data to avoid dangling pointers, we don't need these buffers in the Desc + // Sanitize the geometry data to avoid dangling pointers, we don't need these buffers in the desc for (auto& geometry : as->desc.bottomLevelGeometries) { static_assert(offsetof(rt::GeometryTriangles, indexBuffer) @@ -665,7 +760,17 @@ namespace nvrhi::d3d12 static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) == offsetof(rt::GeometryAABBs, unused)); - // Clear only the triangles' data, because the AABBs' data is aliased to triangles (verified above) + static_assert(offsetof(rt::GeometryTriangles, indexBuffer) + == offsetof(rt::GeometrySpheres, indexBuffer)); + static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) + == offsetof(rt::GeometrySpheres, vertexBuffer)); + + static_assert(offsetof(rt::GeometryTriangles, indexBuffer) + == offsetof(rt::GeometryLss, indexBuffer)); + static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) + == offsetof(rt::GeometryLss, vertexBuffer)); + + // Clear only the triangles' data, because the other types' data is aliased to triangles (verified above) geometry.geometryData.triangles.indexBuffer = nullptr; geometry.geometryData.triangles.vertexBuffer = nullptr; } @@ -704,6 +809,227 @@ namespace nvrhi::d3d12 m_Context.device->CreateShaderResourceView(nullptr, &srvDesc, { descriptor }); } + // ------------------------------------------------------------------------------------------------------------------------- + // Ray Tracing Cluster Operations + // ------------------------------------------------------------------------------------------------------------------------- +#if NVRHI_WITH_NVAPI_CLUSTERS + const char* kClusterOperationTypeStrings[] = { + "Move", + "ClasBuild", + "ClasBuildTemplates", + "ClasInstantiateTemplates", + "BlasBuild" + }; + static_assert(std::size(kClusterOperationTypeStrings) == size_t(nvrhi::rt::cluster::OperationType::BlasBuild) + 1); + + static_assert(NVAPI_D3D12_RAYTRACING_CLAS_BYTE_ALIGNMENT == nvrhi::rt::cluster::kClasByteAlignment); + static_assert(NVAPI_D3D12_RAYTRACING_MAXIMUM_GEOMETRY_INDEX == nvrhi::rt::cluster::kMaxGeometryIndex); + + static_assert(sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS) == sizeof(nvrhi::rt::cluster::IndirectTriangleClasArgs)); + static_assert(sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS) == sizeof(nvrhi::rt::cluster::IndirectTriangleTemplateArgs)); + static_assert(sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_INSTANTIATE_TEMPLATE_ARGS) == sizeof(nvrhi::rt::cluster::IndirectInstantiateTemplateArgs)); + + // --- Helpers --- + DEFINE_ENUM_FLAG_OPERATORS(NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS); + static NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS translateClusterOperationFlags(const rt::cluster::OperationFlags& flags) + { + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAGS result = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_NONE; + + const bool bFastTrace = (flags & rt::cluster::OperationFlags::FastTrace) != 0; + const bool bFastBuild = (flags & rt::cluster::OperationFlags::FastBuild) != 0; + + if (bFastTrace) + { + result |= NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_FAST_TRACE; + } + + if (!bFastTrace && bFastBuild) + { + result |= NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_FAST_BUILD; + } + + if ((flags & rt::cluster::OperationFlags::AllowOMM) != 0) + { + result |= NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_ALLOW_OMM; + } + + if ((flags & rt::cluster::OperationFlags::NoOverlap) != 0) + { + result |= NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_FLAG_NO_OVERLAP; + } + + return result; + } + + static NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE translateClusterOperationMode(const rt::cluster::OperationMode& Mode) + { + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE result = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS; + + switch (Mode) + { + case rt::cluster::OperationMode::ImplicitDestinations: + result = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_IMPLICIT_DESTINATIONS; + break; + + case rt::cluster::OperationMode::ExplicitDestinations: + result = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_EXPLICIT_DESTINATIONS; + break; + + case rt::cluster::OperationMode::GetSizes: + result = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MODE_GET_SIZES; + break; + + default: + assert(false); + break; + } + + return result; + } + + static DXGI_FORMAT translateCLASBuildOperationVertexFormat(const rt::cluster::OperationParams& params) + { + const DxgiFormatMapping& formatMapping = getDxgiFormatMapping(params.clas.vertexFormat); + DXGI_FORMAT nativeFormat = formatMapping.srvFormat; + assert(nativeFormat != DXGI_FORMAT_UNKNOWN); + return nativeFormat; + } + + static uint32_t translateMoveOperation(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS& inputs) + { + inputs.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_MOVE_CLUSTER_OBJECT; + inputs.movesDesc.maxBytesMoved = params.move.maxBytes; + + switch (params.move.type) + { + case rt::cluster::OperationMoveType::BottomLevel: + inputs.movesDesc.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_BOTTOM_LEVEL_ACCELERATION_STRUCTURE; + break; + case rt::cluster::OperationMoveType::ClusterLevel: + inputs.movesDesc.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_CLUSTER_LEVEL_ACCELERATION_STRUCTURE; + break; + case rt::cluster::OperationMoveType::Template: + inputs.movesDesc.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_MOVE_TYPE_TEMPLATE; + break; + default: + assert(false); + } + + return sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_MOVE_ARGS); + } + + static void translateClusterTriangleDesc(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUT_TRIANGLES_DESC& TriangleDesc) + { + TriangleDesc.vertexFormat = translateCLASBuildOperationVertexFormat(params); + TriangleDesc.maxGeometryIndexValue = params.clas.maxGeometryIndex; + TriangleDesc.maxUniqueGeometryCountPerArg = params.clas.maxUniqueGeometryCount; + TriangleDesc.maxTriangleCountPerArg = params.clas.maxTriangleCount; + TriangleDesc.maxVertexCountPerArg = params.clas.maxVertexCount; + TriangleDesc.maxTotalTriangleCount = params.clas.maxTotalTriangleCount; + TriangleDesc.maxTotalVertexCount = params.clas.maxTotalVertexCount; + TriangleDesc.minPositionTruncateBitCount = params.clas.minPositionTruncateBitCount; + } + + static uint32_t translateCLASBuildOperation(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS& inputs) + { + inputs.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLAS_FROM_TRIANGLES; + + translateClusterTriangleDesc(params, inputs.trianglesDesc); + + return sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_CLUSTER_ARGS); + } + + static uint32_t translateCLASTemplateBuildOperation(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS& inputs) + { + inputs.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_CLUSTER_TEMPLATES_FROM_TRIANGLES; + + translateClusterTriangleDesc(params, inputs.trianglesDesc); + + return sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_TRIANGLE_TEMPLATE_ARGS); + } + + static uint32_t translateCLASTemplateInstantiateOperation(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS& inputs) + { + inputs.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_INSTANTIATE_CLUSTER_TEMPLATES; + + translateClusterTriangleDesc(params, inputs.trianglesDesc); + + return sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_INSTANTIATE_TEMPLATE_ARGS); + } + + static uint32_t translateBLASBuildOperation(const rt::cluster::OperationParams& params, NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS& inputs) + { + inputs.type = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_TYPE_BUILD_BLAS_FROM_CLAS; + inputs.clasDesc.maxTotalClasCount = params.blas.maxTotalClasCount; + inputs.clasDesc.maxClasCountPerArg = params.blas.maxClasPerBlasCount; + + return sizeof(NVAPI_D3D12_RAYTRACING_ACCELERATION_STRUCTURE_MULTI_INDIRECT_CLUSTER_ARGS); + } +#endif // #if NVRHI_WITH_NVAPI_CLUSTERS + + // --- Memory Requirements --- + + // Determines memory requirements for the specified cluster operation + rt::cluster::OperationSizeInfo Device::getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) + { +#if NVRHI_WITH_NVAPI_CLUSTERS + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS inputs = {}; + inputs.maxArgCount = params.maxArgCount; + inputs.mode = translateClusterOperationMode(params.mode); + inputs.flags = translateClusterOperationFlags(params.flags); + + switch (params.type) + { + case rt::cluster::OperationType::Move: + translateMoveOperation(params, inputs); + break; + + case rt::cluster::OperationType::ClasBuild: + translateCLASBuildOperation(params, inputs); + break; + + case rt::cluster::OperationType::ClasBuildTemplates: + translateCLASTemplateBuildOperation(params, inputs); + break; + + case rt::cluster::OperationType::ClasInstantiateTemplates: + translateCLASTemplateInstantiateOperation(params, inputs); + break; + + case rt::cluster::OperationType::BlasBuild: + translateBLASBuildOperation(params, inputs); + break; + + default: + assert(false); + break; + } + + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO info = {}; + + NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS d3d12Params; + d3d12Params.version = NVAPI_GET_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_REQUIREMENTS_INFO_PARAMS_VER; + d3d12Params.pInput = &inputs; + d3d12Params.pInfo = &info; + + NvAPI_Status result = NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo(m_Context.device5, &d3d12Params); + if (result != NVAPI_OK) + { + m_Context.error("NvAPI_D3D12_GetRaytracingMultiIndirectClusterOperationRequirementsInfo failed with NvAPI_Status " + std::to_string(result)); + } + + rt::cluster::OperationSizeInfo sizeInfo = {}; + sizeInfo.resultMaxSizeInBytes = align(info.resultDataMaxSizeInBytes, uint64_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)); + sizeInfo.scratchSizeInBytes = align(info.scratchDataSizeInBytes, uint64_t(D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)); + return sizeInfo; +#else + (void)params; + utils::NotSupported(); + return rt::cluster::OperationSizeInfo{}; +#endif + } + + bool Device::setHlslExtensionsUAV(uint32_t slot) { #if NVRHI_D3D12_WITH_NVAPI @@ -1346,7 +1672,7 @@ namespace nvrhi::d3d12 if (triangles.ommIndexBuffer) m_Instance->referencedResources.push_back(triangles.ommIndexBuffer); } - else + else if (geometryDesc.geometryType == rt::GeometryType::AABBs) { const auto& aabbs = geometryDesc.geometryData.aabbs; @@ -1357,6 +1683,40 @@ namespace nvrhi::d3d12 m_Instance->referencedResources.push_back(aabbs.buffer); } +#if NVRHI_WITH_NVAPI_LSS + else if (geometryDesc.geometryType == rt::GeometryType::Spheres) + { + const auto& spheres = geometryDesc.geometryData.spheres; + + if (m_EnableAutomaticBarriers) + { + if (spheres.indexBuffer) + { + requireBufferState(spheres.indexBuffer, ResourceStates::AccelStructBuildInput); + } + requireBufferState(spheres.vertexBuffer, ResourceStates::AccelStructBuildInput); + } + + m_Instance->referencedResources.push_back(spheres.indexBuffer); + m_Instance->referencedResources.push_back(spheres.vertexBuffer); + } + else if (geometryDesc.geometryType == rt::GeometryType::Lss) + { + const auto& lss = geometryDesc.geometryData.lss; + + if (m_EnableAutomaticBarriers) + { + if (lss.indexBuffer) + { + requireBufferState(lss.indexBuffer, ResourceStates::AccelStructBuildInput); + } + requireBufferState(lss.vertexBuffer, ResourceStates::AccelStructBuildInput); + } + + m_Instance->referencedResources.push_back(lss.indexBuffer); + m_Instance->referencedResources.push_back(lss.vertexBuffer); + } +#endif } commitBarriers(); @@ -1458,7 +1818,7 @@ namespace nvrhi::d3d12 } commitBarriers(); -#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP +#if NVRHI_WITH_NVAPI_OPACITY_MICROMAP || NVRHI_WITH_NVAPI_LSS if (checked_cast(m_Device)->GetNvapiIsInitialized()) { NVAPI_D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC_EX buildDesc = {}; @@ -1663,4 +2023,156 @@ namespace nvrhi::d3d12 if (as->desc.trackLiveness) m_Instance->referencedResources.push_back(as); } + + + void CommandList::executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) + { +#if NVRHI_WITH_NVAPI_CLUSTERS + // Early out: no acceleration structures to build, instantiate, or move + if (desc.params.maxArgCount == 0) return; + + // Validate resource buffers + assert(desc.inIndirectArgsBuffer != nullptr); + assert(desc.scratchSizeInBytes != 0); + + if (desc.params.mode == rt::cluster::OperationMode::ImplicitDestinations) + { + assert(desc.inOutAddressesBuffer != nullptr); + assert(desc.outAccelerationStructuresBuffer != nullptr); + } + else if (desc.params.mode == rt::cluster::OperationMode::ExplicitDestinations) + { + assert(desc.inOutAddressesBuffer != nullptr); + } + else if (desc.params.mode == rt::cluster::OperationMode::GetSizes) + { + assert(desc.outSizesBuffer != nullptr); // executeMultiIndirectClusterOperation requires a valid sizes output buffer when in GetSizes mode + } + + uint32_t indirectArgsStride = 0; + + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_INPUTS inputs = {}; + inputs.maxArgCount = desc.params.maxArgCount; + inputs.mode = translateClusterOperationMode(desc.params.mode); + inputs.flags = translateClusterOperationFlags(desc.params.flags); + + switch (desc.params.type) + { + case rt::cluster::OperationType::Move: + indirectArgsStride = translateMoveOperation(desc.params, inputs); + break; + + case rt::cluster::OperationType::ClasBuild: + indirectArgsStride = translateCLASBuildOperation(desc.params, inputs); + break; + + case rt::cluster::OperationType::ClasBuildTemplates: + indirectArgsStride = translateCLASTemplateBuildOperation(desc.params, inputs); + break; + + case rt::cluster::OperationType::ClasInstantiateTemplates: + indirectArgsStride = translateCLASTemplateInstantiateOperation(desc.params, inputs); + break; + + case rt::cluster::OperationType::BlasBuild: + indirectArgsStride = translateBLASBuildOperation(desc.params, inputs); + break; + + default: + assert(false); + break; + } + + // Inputs + Buffer* inIndirectArgCountBuffer = checked_cast(desc.inIndirectArgCountBuffer); + Buffer* inIndirectArgsBuffer = checked_cast(desc.inIndirectArgsBuffer); + + D3D12_GPU_VIRTUAL_ADDRESS scratchGpuVA = 0; + if (!m_DxrScratchManager.suballocateBuffer(desc.scratchSizeInBytes, m_ActiveCommandList->commandList, nullptr, nullptr, nullptr, + &scratchGpuVA, m_RecordingVersion, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BYTE_ALIGNMENT)) + { + const char* clusterOperationType = "Unknown"; + if (size_t(desc.params.type) < std::size(kClusterOperationTypeStrings)) + { + clusterOperationType = kClusterOperationTypeStrings[size_t(desc.params.type)]; + } + + std::stringstream ss; + ss << "Couldn't suballocate a scratch buffer for ClusterOperation" << clusterOperationType << ". " + "The operation requires " << desc.scratchSizeInBytes << " bytes of scratch space."; + + m_Context.error(ss.str()); + return; + } + + // Input/Output + Buffer* inOutAddressesBuffer = checked_cast(desc.inOutAddressesBuffer); + + // Outputs + Buffer* outAccelerationStructuresBuffer = checked_cast(desc.outAccelerationStructuresBuffer); + Buffer* outSizesBuffer = checked_cast(desc.outSizesBuffer); + + if (m_EnableAutomaticBarriers) + { + requireBufferState(inIndirectArgsBuffer, ResourceStates::ShaderResource); + if (inIndirectArgCountBuffer) + requireBufferState(inIndirectArgCountBuffer, ResourceStates::ShaderResource); + if (inOutAddressesBuffer) + requireBufferState(inOutAddressesBuffer, ResourceStates::UnorderedAccess); + if (outAccelerationStructuresBuffer) + requireBufferState(outAccelerationStructuresBuffer, ResourceStates::AccelStructWrite); + if (outSizesBuffer) + requireBufferState(outSizesBuffer, ResourceStates::UnorderedAccess); + } + commitBarriers(); + + // Describe the cluster operation + NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_DESC d3d12Desc = {}; + d3d12Desc.inputs = inputs; + + // Address resolution + d3d12Desc.addressResolutionFlags = NVAPI_D3D12_RAYTRACING_MULTI_INDIRECT_CLUSTER_OPERATION_ADDRESS_RESOLUTION_FLAG_NONE; + + // Input Buffers + if (inIndirectArgCountBuffer) + { + d3d12Desc.indirectArgCount = inIndirectArgCountBuffer->gpuVA + desc.inIndirectArgCountOffsetInBytes; + } + d3d12Desc.indirectArgArray.StartAddress = inIndirectArgsBuffer->gpuVA + desc.inIndirectArgsOffsetInBytes; + d3d12Desc.indirectArgArray.StrideInBytes = indirectArgsStride; + d3d12Desc.batchScratchData = scratchGpuVA; + + // Input / Output Buffers + if (inOutAddressesBuffer) + { + d3d12Desc.destinationAddressArray.StartAddress = inOutAddressesBuffer->gpuVA + desc.inOutAddressesOffsetInBytes; + d3d12Desc.destinationAddressArray.StrideInBytes = inOutAddressesBuffer->getDesc().structStride; + } + + // Output Buffers + if (outAccelerationStructuresBuffer) + { + d3d12Desc.batchResultData = outAccelerationStructuresBuffer->gpuVA + desc.outAccelerationStructuresOffsetInBytes; + } + if (outSizesBuffer) + { + d3d12Desc.resultSizeArray.StartAddress = outSizesBuffer->gpuVA + desc.outSizesOffsetInBytes; + d3d12Desc.resultSizeArray.StrideInBytes = outSizesBuffer->getDesc().structStride; + } + + NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS clusterOpParams = {}; + clusterOpParams.version = NVAPI_RAYTRACING_EXECUTE_MULTI_INDIRECT_CLUSTER_OPERATION_PARAMS_VER; + clusterOpParams.pDesc = &d3d12Desc; + + // Execute the PTLAS operation + NvAPI_Status result = NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation(m_ActiveCommandList->commandList4, &clusterOpParams); + if (result != NVAPI_OK) + { + m_Context.error("NvAPI_D3D12_RaytracingExecuteMultiIndirectClusterOperation failed with NvAPI_Status " + std::to_string(result)); + } +#else + (void)desc; + utils::NotSupported(); +#endif + } } // namespace nvrhi::d3d12 diff --git a/src/d3d12/d3d12-resource-bindings.cpp b/src/d3d12/d3d12-resource-bindings.cpp index 579d8fd..bdafecb 100644 --- a/src/d3d12/d3d12-resource-bindings.cpp +++ b/src/d3d12/d3d12-resource-bindings.cpp @@ -688,6 +688,12 @@ namespace nvrhi::d3d12 rsDesc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE; } + if (m_HeapDirectlyIndexedEnabled) + { + rsDesc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; + rsDesc.Desc_1_1.Flags |= D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED; + } + if (!rootParameters.empty()) { rsDesc.Desc_1_1.pParameters = rootParameters.data(); diff --git a/src/validation/validation-backend.h b/src/validation/validation-backend.h index b38a9e6..101dff9 100644 --- a/src/validation/validation-backend.h +++ b/src/validation/validation-backend.h @@ -188,6 +188,7 @@ namespace nvrhi::validation void buildTopLevelAccelStruct(rt::IAccelStruct* as, const rt::InstanceDesc* pInstances, size_t numInstances, rt::AccelStructBuildFlags buildFlags) override; void buildTopLevelAccelStructFromBuffer(rt::IAccelStruct* as, nvrhi::IBuffer* instanceBuffer, uint64_t instanceBufferOffset, size_t numInstances, rt::AccelStructBuildFlags buildFlags = rt::AccelStructBuildFlags::None) override; + void executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) override; void beginTimerQuery(ITimerQuery* query) override; void endTimerQuery(ITimerQuery* query) override; @@ -240,6 +241,7 @@ namespace nvrhi::validation bool validateShaderType(ShaderType expected, const ShaderDesc& shaderDesc, const char* function) const; bool validateRenderState(const RenderState& renderState, IFramebuffer* fb) const; + bool validateClusterOperationParams(const rt::cluster::OperationParams& params) const; public: // IResource implementation @@ -316,6 +318,7 @@ namespace nvrhi::validation rt::OpacityMicromapHandle createOpacityMicromap(const rt::OpacityMicromapDesc& desc) override; rt::AccelStructHandle createAccelStruct(const rt::AccelStructDesc& desc) override; MemoryRequirements getAccelStructMemoryRequirements(rt::IAccelStruct* as) override; + rt::cluster::OperationSizeInfo getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) override; bool bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) override; CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) override; diff --git a/src/validation/validation-commandlist.cpp b/src/validation/validation-commandlist.cpp index 3bd4fee..edd2770 100644 --- a/src/validation/validation-commandlist.cpp +++ b/src/validation/validation-commandlist.cpp @@ -1336,7 +1336,7 @@ namespace nvrhi::validation return; } } - else // AABBs + else if (geom.geometryType == rt::GeometryType::AABBs) { const auto& aabbs = geom.geometryData.aabbs; @@ -1391,6 +1391,36 @@ namespace nvrhi::validation m_MessageCallback->message(MessageSeverity::Warning, ss.str().c_str()); } } + else if (geom.geometryType == rt::GeometryType::Spheres) + { + const auto& spheres = geom.geometryData.spheres; + + if (spheres.vertexBuffer == nullptr) + { + std::stringstream ss; + ss << "BLAS " << utils::DebugNameToString(as->getDesc().debugName) << " build geometry " << i + << " has NULL vertex buffer"; + error(ss.str()); + return; + } + + // TODO: Add more validation + } + else if (geom.geometryType == rt::GeometryType::Lss) + { + const auto& lss = geom.geometryData.lss; + + if (lss.vertexBuffer == nullptr) + { + std::stringstream ss; + ss << "BLAS " << utils::DebugNameToString(as->getDesc().debugName) << " build geometry " << i + << " has NULL vertex buffer"; + error(ss.str()); + return; + } + + // TODO: Add more validation + } } if ((buildFlags & rt::AccelStructBuildFlags::PerformUpdate) != 0) @@ -1697,6 +1727,68 @@ namespace nvrhi::validation m_CommandList->buildTopLevelAccelStructFromBuffer(underlyingAS, instanceBuffer, instanceBufferOffset, numInstances, buildFlags); } + void CommandListWrapper::executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) + { + if (!requireOpenState()) + return; + + if (!requireType(CommandQueue::Compute, "executeMultiIndirectClusterOperation")) + return; + + if (!m_Device->validateClusterOperationParams(desc.params)) + return; + + if (desc.inIndirectArgCountBuffer == nullptr && desc.params.maxArgCount == 0) + { + error("executeMultiIndirectClusterOperation: 'inIndirectArgCountBuffer' is NULL and maxArgCount is 0"); + return; + } + + if (desc.inIndirectArgsBuffer == nullptr) + { + error("executeMultiIndirectClusterOperation: 'inIndirectArgsBuffer' is NULL"); + return; + } + + if (desc.scratchSizeInBytes == 0) + { + error("executeMultiIndirectClusterOperation: 'scratchSizeInBytes' is 0"); + return; + } + + if (desc.params.mode == rt::cluster::OperationMode::ImplicitDestinations) + { + if (desc.inOutAddressesBuffer == nullptr) + { + error("executeMultiIndirectClusterOperation (cluster::OperationMode::ImplicitDestinations): 'inOutAddressesBuffer' is NULL"); + return; + } + if (desc.outAccelerationStructuresBuffer == nullptr) + { + error("executeMultiIndirectClusterOperation (cluster::OperationMode::ImplicitDestinations): 'outAccelerationStructuresBuffer' is NULL"); + return; + } + } + else if (desc.params.mode == rt::cluster::OperationMode::ExplicitDestinations) + { + if (desc.inOutAddressesBuffer == nullptr) + { + error("executeMultiIndirectClusterOperation (cluster::OperationMode::ExplicitDestinations): 'inOutAddressesBuffer' is NULL"); + return; + } + } + else if (desc.params.mode == rt::cluster::OperationMode::GetSizes) + { + if (desc.outSizesBuffer == nullptr) + { + error("executeMultiIndirectClusterOperation (cluster::OperationMode::GetSizes): 'outSizesBuffer' is NULL"); + return; + } + } + + m_CommandList->executeMultiIndirectClusterOperation(desc); + } + void CommandListWrapper::evaluatePushConstantSize(const nvrhi::BindingLayoutVector& bindingLayouts) { m_PipelinePushConstantSize = 0; diff --git a/src/validation/validation-device.cpp b/src/validation/validation-device.cpp index bf177d6..65fbd6f 100644 --- a/src/validation/validation-device.cpp +++ b/src/validation/validation-device.cpp @@ -1763,6 +1763,133 @@ namespace nvrhi::validation return memReq; } + static const char* kOperationTypeStrings[] = + { + "Move", + "ClasBuild", + "ClasBuildTemplates", + "ClasInstantiateTemplates", + "BlasBuild" + }; + static_assert(std::size(kOperationTypeStrings) == uint32_t(rt::cluster::OperationType::BlasBuild) + 1); + + + bool DeviceWrapper::validateClusterOperationParams(const rt::cluster::OperationParams& params) const + { + bool isValid = true; + + const char* operationType = "Unknown"; + if (uint32_t(params.type) < std::size(kOperationTypeStrings)) + { + operationType = kOperationTypeStrings[uint32_t(params.type)]; + } + + switch (params.mode) + { + case rt::cluster::OperationMode::ImplicitDestinations: + case rt::cluster::OperationMode::ExplicitDestinations: + case rt::cluster::OperationMode::GetSizes: + break; + default: + isValid = false; + error("cluster::OperationParams " + std::string(operationType) + " unknown cluster::OperationMode"); + break; + } + + bool validateClasParams = false; + switch (params.type) + { + case rt::cluster::OperationType::Move: + break; + case rt::cluster::OperationType::ClasBuild: + case rt::cluster::OperationType::ClasBuildTemplates: + case rt::cluster::OperationType::ClasInstantiateTemplates: + validateClasParams = true; + break; + case rt::cluster::OperationType::BlasBuild: + break; + } + + if (validateClasParams) + { + nvrhi::Format vertexFormat = params.clas.vertexFormat; + const bool validVertexFormat = + (vertexFormat == nvrhi::Format::RGBA32_FLOAT) + || (vertexFormat == nvrhi::Format::RGB32_FLOAT) + || (vertexFormat == nvrhi::Format::RG32_FLOAT) + || (vertexFormat == nvrhi::Format::RGBA16_FLOAT) + || (vertexFormat == nvrhi::Format::RG16_FLOAT) + || (vertexFormat == nvrhi::Format::RGBA16_SNORM) + || (vertexFormat == nvrhi::Format::RG16_SNORM) + || (vertexFormat == nvrhi::Format::RGBA8_SNORM) + || (vertexFormat == nvrhi::Format::RG8_SNORM) + || (vertexFormat == nvrhi::Format::RGBA16_UNORM) + || (vertexFormat == nvrhi::Format::RG16_UNORM) + || (vertexFormat == nvrhi::Format::RGBA8_UNORM) + || (vertexFormat == nvrhi::Format::RG8_UNORM) + || (vertexFormat == nvrhi::Format::R10G10B10A2_UNORM); + if (!validVertexFormat) + { + error("cluster::OperationParams " + std::string(operationType) + " does not have a valid vertex format"); + isValid = false; + } + + if (params.clas.maxGeometryIndex > nvrhi::rt::cluster::kMaxGeometryIndex) + { + error("cluster::OperationParams " + std::string(operationType) + " has a maxGeometryIndex over " + std::to_string(nvrhi::rt::cluster::kMaxGeometryIndex)); + isValid = false; + } + + if (params.clas.minPositionTruncateBitCount > 32) + { + error("cluster::OperationParams " + std::string(operationType) + " minPositionTruncateBitCount over " + std::to_string(32)); + isValid = false; + } + + if (params.clas.maxTriangleCount > nvrhi::rt::cluster::kClasMaxTriangles) + { + error("cluster::OperationParams " + std::string(operationType) + " maxTriangleCount over " + std::to_string(nvrhi::rt::cluster::kClasMaxTriangles)); + isValid = false; + } + + if (params.clas.maxVertexCount > nvrhi::rt::cluster::kClasMaxVertices) + { + error("cluster::OperationParams " + std::string(operationType) + " maxVertexCount over " + std::to_string(nvrhi::rt::cluster::kClasMaxVertices)); + isValid = false; + } + + if (params.clas.maxTriangleCount > params.clas.maxTotalTriangleCount) + { + error("cluster::OperationParams " + std::string(operationType) + " maxTriangleCount over maxTotalTriangleCount. maxTotalTriangleCount must be greater than the sum of all triangles in the operation"); + isValid = false; + } + + if (params.clas.maxVertexCount > params.clas.maxTotalVertexCount) + { + error("cluster::OperationParams " + std::string(operationType) + " maxVertexCount over maxTotalVertexCount. maxTotalVertexCount must be greater than the sum of all vertices in the operation"); + isValid = false; + } + + if (params.clas.maxUniqueGeometryCount > params.clas.maxTriangleCount) + { + error("cluster::OperationParams " + std::string(operationType) + " maxUniqueGeometryCount over maxTriangleCount. Maximum 1 geometry per triangle"); + isValid = false; + } + } + + return isValid; + } + + rt::cluster::OperationSizeInfo DeviceWrapper::getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) + { + if (!validateClusterOperationParams(params)) + { + return rt::cluster::OperationSizeInfo{}; + } + + return m_Device->getClusterOperationSizeInfo(params); + } + bool DeviceWrapper::bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) { if (as == nullptr) diff --git a/src/vulkan/vulkan-backend.h b/src/vulkan/vulkan-backend.h index 64f2612..3b94c5d 100644 --- a/src/vulkan/vulkan-backend.h +++ b/src/vulkan/vulkan-backend.h @@ -569,6 +569,7 @@ namespace nvrhi::vulkan ~Buffer() override; const BufferDesc& getDesc() const override { return desc; } + GpuVirtualAddress getGpuVirtualAddress() const override { return deviceAddress; } Object getNativeObject(ObjectType type) override; private: @@ -810,6 +811,9 @@ namespace nvrhi::vulkan const BindingSetDesc* getDesc() const override { return nullptr; } IBindingLayout* getLayout() const override { return layout; } uint32_t getCapacity() const override { return capacity; } + + // Vulkan doesnt not have a concept of the first descriptor in the heap + uint32_t getFirstDescriptorIndexInHeap() const override { return 0; } Object getNativeObject(ObjectType objectType) override; private: @@ -1136,6 +1140,7 @@ namespace nvrhi::vulkan rt::OpacityMicromapHandle createOpacityMicromap(const rt::OpacityMicromapDesc& desc) override; rt::AccelStructHandle createAccelStruct(const rt::AccelStructDesc& desc) override; MemoryRequirements getAccelStructMemoryRequirements(rt::IAccelStruct* as) override; + rt::cluster::OperationSizeInfo getClusterOperationSizeInfo(const rt::cluster::OperationParams& params) override; bool bindAccelStructMemory(rt::IAccelStruct* as, IHeap* heap, uint64_t offset) override; CommandListHandle createCommandList(const CommandListParameters& params = CommandListParameters()) override; @@ -1233,6 +1238,7 @@ namespace nvrhi::vulkan void buildTopLevelAccelStruct(rt::IAccelStruct* as, const rt::InstanceDesc* pInstances, size_t numInstances, rt::AccelStructBuildFlags buildFlags) override; void buildTopLevelAccelStructFromBuffer(rt::IAccelStruct* as, nvrhi::IBuffer* instanceBuffer, uint64_t instanceBufferOffset, size_t numInstances, rt::AccelStructBuildFlags buildFlags = rt::AccelStructBuildFlags::None) override; + void executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc& desc) override; void beginTimerQuery(ITimerQuery* query) override; void endTimerQuery(ITimerQuery* query) override; diff --git a/src/vulkan/vulkan-raytracing.cpp b/src/vulkan/vulkan-raytracing.cpp index 23ef859..7feed78 100644 --- a/src/vulkan/vulkan-raytracing.cpp +++ b/src/vulkan/vulkan-raytracing.cpp @@ -291,7 +291,17 @@ namespace nvrhi::vulkan static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) == offsetof(rt::GeometryAABBs, unused)); - // Clear only the triangles' data, because the AABBs' data is aliased to triangles (verified above) + static_assert(offsetof(rt::GeometryTriangles, indexBuffer) + == offsetof(rt::GeometrySpheres, indexBuffer)); + static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) + == offsetof(rt::GeometrySpheres, vertexBuffer)); + + static_assert(offsetof(rt::GeometryTriangles, indexBuffer) + == offsetof(rt::GeometryLss, indexBuffer)); + static_assert(offsetof(rt::GeometryTriangles, vertexBuffer) + == offsetof(rt::GeometryLss, vertexBuffer)); + + // Clear only the triangles' data, because the other types' data is aliased to triangles (verified above) geometry.geometryData.triangles.indexBuffer = nullptr; geometry.geometryData.triangles.vertexBuffer = nullptr; } @@ -309,6 +319,12 @@ namespace nvrhi::vulkan return MemoryRequirements(); } + rt::cluster::OperationSizeInfo Device::getClusterOperationSizeInfo(const rt::cluster::OperationParams&) + { + utils::NotSupported(); + return rt::cluster::OperationSizeInfo(); + } + bool Device::bindAccelStructMemory(rt::IAccelStruct* _as, IHeap* heap, uint64_t offset) { AccelStruct* as = checked_cast(_as); @@ -739,6 +755,11 @@ namespace nvrhi::vulkan m_CurrentCmdBuf->referencedResources.push_back(as); } + void CommandList::executeMultiIndirectClusterOperation(const rt::cluster::OperationDesc&) + { + utils::NotSupported(); + } + AccelStruct::~AccelStruct() { #ifdef NVRHI_WITH_RTXMU diff --git a/src/vulkan/vulkan-texture.cpp b/src/vulkan/vulkan-texture.cpp index 45ba50e..71163fe 100644 --- a/src/vulkan/vulkan-texture.cpp +++ b/src/vulkan/vulkan-texture.cpp @@ -679,6 +679,8 @@ namespace nvrhi::vulkan return Object(memory); case ObjectTypes::SharedHandle: return Object(sharedHandle); + case ObjectTypes::VK_ImageCreateInfo: + return Object(&imageInfo); default: return nullptr; } diff --git a/thirdparty/Vulkan-Headers b/thirdparty/Vulkan-Headers index 36872f9..39f924b 160000 --- a/thirdparty/Vulkan-Headers +++ b/thirdparty/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 36872f9062b17b1a30b8ed1d81ca5ea6bb608a72 +Subproject commit 39f924b810e561fd86b2558b6711ca68d4363f68