diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index eae48b2a..99450c50 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,6 +52,7 @@ jobs: if not exist ../out/pl_image_ext.dll exit 1 if not exist ../out/pl_stats_ext.dll exit 1 if not exist ../out/pl_debug_ext.dll exit 1 + if not exist ../out/pl_gpu_allocators_ext.dll exit 1 if not exist ../out/pl_resource_ext.dll exit 1 if not exist ../out/pl_ecs_ext.dll exit 1 if not exist ../out/pl_ref_renderer_ext.dll exit 1 @@ -102,6 +103,7 @@ jobs: test -f ./out/pl_image_ext.dylib || exit 1 test -f ./out/pl_debug_ext.dylib || exit 1 test -f ./out/pl_graphics_ext.dylib || exit 1 + test -f ./out/pl_gpu_allocators_ext.dylib || exit 1 test -f ./out/pl_resource_ext.dylib || exit 1 test -f ./out/pl_ecs_ext.dylib || exit 1 test -f ./out/pl_ref_renderer_ext.dylib || exit 1 @@ -165,6 +167,7 @@ jobs: test -f ./out/pl_image_ext.so || exit 1 test -f ./out/pl_stats_ext.so || exit 1 test -f ./out/pl_debug_ext.so || exit 1 + test -f ./out/pl_gpu_allocators_ext.so || exit 1 test -f ./out/pl_resource_ext.so || exit 1 test -f ./out/pl_ecs_ext.so || exit 1 test -f ./out/pl_ref_renderer_ext.so || exit 1 diff --git a/extensions/pl_debug_ext.c b/extensions/pl_debug_ext.c index 7423c420..9ccb19e0 100644 --- a/extensions/pl_debug_ext.c +++ b/extensions/pl_debug_ext.c @@ -35,15 +35,17 @@ Index of this file: #include "pl_ui_internal.h" #include "pl_stats_ext.h" #include "pl_graphics_ext.h" +#include "pl_gpu_allocators_ext.h" //----------------------------------------------------------------------------- // [SECTION] global data //----------------------------------------------------------------------------- // apis -static const plApiRegistryI* gptApiRegistry = NULL; -static const plStatsI* ptStatsApi = NULL; -static const plDataRegistryI* ptDataRegistry = NULL; +static const plApiRegistryI* gptApiRegistry = NULL; +static const plStatsI* ptStatsApi = NULL; +static const plDataRegistryI* ptDataRegistry = NULL; +static const plGPUAllocatorsI* gptGpuAllocators = NULL; // contexts static plMemoryContext* ptMemoryCtx = NULL; @@ -803,10 +805,10 @@ pl__show_device_memory(bool* bValue) pl_text("Host Memory: %llu bytes", (double)ptDevice->ptGraphics->szHostMemoryInUse); const plDeviceMemoryAllocatorI atAllocators[] = { - ptDevice->tLocalBuddyAllocator, - ptDevice->tLocalDedicatedAllocator, - ptDevice->tStagingUnCachedAllocator, - ptDevice->tStagingCachedAllocator + *gptGpuAllocators->create_local_buddy_allocator(ptDevice), + *gptGpuAllocators->create_local_dedicated_allocator(ptDevice), + *gptGpuAllocators->create_staging_uncached_allocator(ptDevice), + *gptGpuAllocators->create_staging_uncached_allocator(ptDevice) }; const char* apcAllocatorNames[] = { @@ -1072,6 +1074,7 @@ pl_load_ext(plApiRegistryI* ptApiRegistry, bool bReload) pl_set_context(ptDataRegistry->get_data("ui")); ptStatsApi = ptApiRegistry->first(PL_API_STATS); + gptGpuAllocators = ptApiRegistry->first(PL_API_GPU_ALLOCATORS); ptIOCtx = pl_get_io(); if(bReload) diff --git a/extensions/pl_gpu_allocators_ext.c b/extensions/pl_gpu_allocators_ext.c new file mode 100644 index 00000000..cccb4598 --- /dev/null +++ b/extensions/pl_gpu_allocators_ext.c @@ -0,0 +1,729 @@ +/* + pl_gpu_allocaters_ext.c +*/ + +/* +Index of this file: +// [SECTION] includes +// [SECTION] internal api implementation +// [SECTION] public api implementation +// [SECTION] extension loading +// [SECTION] unity build +*/ + +//----------------------------------------------------------------------------- +// [SECTION] includes +//----------------------------------------------------------------------------- + +#include "pilotlight.h" +#include "pl_gpu_allocators_ext.h" +#include "pl_graphics_ext.h" +#define PL_MATH_INCLUDE_FUNCTIONS +#include "pl_math.h" + +static const plDeviceI* gptDevice = NULL; +static const plGraphicsI* gptGfx = NULL; + +//----------------------------------------------------------------------------- +// [SECTION] internal api +//----------------------------------------------------------------------------- + +typedef struct _plDeviceAllocatorData +{ + plDeviceMemoryAllocatorI* ptAllocator; + plDevice* ptDevice; + plDeviceAllocationBlock* sbtBlocks; + uint32_t* sbtFreeBlockIndices; + + // buddy allocator data + plDeviceAllocationRange* sbtNodes; + uint32_t auFreeList[PL_DEVICE_LOCAL_LEVELS]; +} plDeviceAllocatorData; + +static plDeviceAllocationBlock* +pl_get_allocator_blocks(struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + if(puSizeOut) + { + *puSizeOut = pl_sb_size(ptData->sbtBlocks); + } + return ptData->sbtBlocks; +} + +static plDeviceAllocationRange* +pl_get_allocator_ranges(struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + if(puSizeOut) + { + *puSizeOut = pl_sb_size(ptData->sbtNodes); + } + return ptData->sbtNodes; +} + +static void +pl__add_node_to_freelist(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) +{ + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; + ptNode->ulUsedSize = 0; + ptData->sbtNodes[uNode].uNextNode = ptData->auFreeList[uLevel]; + ptData->auFreeList[uLevel] = uNode; +} + +static void +pl__remove_node_from_freelist(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) +{ + + bool bFound = false; + if(ptData->auFreeList[uLevel] == uNode) + { + ptData->auFreeList[uLevel] = ptData->sbtNodes[uNode].uNextNode; + bFound = true; + } + else + { + uint32_t uNextNode = ptData->auFreeList[uLevel]; + while(uNextNode != UINT32_MAX) + { + uint32_t uPrevNode = uNextNode; + uNextNode = ptData->sbtNodes[uPrevNode].uNextNode; + + if(uNextNode == uNode) + { + ptData->sbtNodes[uPrevNode].uNextNode = ptData->sbtNodes[uNode].uNextNode; + bFound = true; + break; + } + } + } + + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; + ptNode->ulUsedSize = UINT64_MAX; // ignored + ptNode->uNextNode = UINT32_MAX; + PL_ASSERT(bFound && "could not find node to remove"); +} + +static uint32_t +pl__create_device_node(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uMemoryType) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + uint32_t uNode = UINT32_MAX; + + plDeviceAllocationBlock tBlock = { + .ulAddress = 0, + .ulSize = PL_DEVICE_BUDDY_BLOCK_SIZE, + .ulMemoryType = uMemoryType + }; + + uNode = pl_sb_size(ptData->sbtNodes); + uint32_t uNodeIndex = uNode; + pl_sb_resize(ptData->sbtNodes, pl_sb_size(ptData->sbtNodes) + (1 << PL_DEVICE_LOCAL_LEVELS) - 1); + const uint32_t uBlockIndex = pl_sb_size(ptData->sbtBlocks); + for(uint32_t uLevelIndex = 0; uLevelIndex < PL_DEVICE_LOCAL_LEVELS; uLevelIndex++) + { + const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)uLevelIndex); + const uint32_t uLevelBlockCount = (1 << uLevelIndex); + uint64_t uCurrentOffset = 0; + for(uint32_t i = 0; i < uLevelBlockCount; i++) + { + ptData->sbtNodes[uNodeIndex].uNodeIndex = uNodeIndex; + ptData->sbtNodes[uNodeIndex].uNextNode = UINT32_MAX; + ptData->sbtNodes[uNodeIndex].ulOffset = uCurrentOffset; + ptData->sbtNodes[uNodeIndex].ulTotalSize = uSizeOfLevel; + ptData->sbtNodes[uNodeIndex].ulBlockIndex = uBlockIndex; + strncpy(ptData->sbtNodes[uNodeIndex].acName, "not used", PL_MAX_NAME_LENGTH); + uCurrentOffset += uSizeOfLevel; + uNodeIndex++; + } + } + pl_sb_push(ptData->sbtBlocks, tBlock); + return uNode; +} + +static uint32_t +pl__get_device_node(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uLevel, uint32_t uMemoryType) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + uint32_t uNode = UINT32_MAX; + + if(uLevel == 0) + { + if(ptData->auFreeList[0] == UINT32_MAX) // no nodes available + { + uNode = pl__create_device_node(ptInst, uMemoryType); + pl__add_node_to_freelist(ptData, 0, uNode); + } + else // nodes available + { + // look for block with correct memory type + uint32_t uNextNode = ptData->auFreeList[0]; + while(uNextNode != UINT32_MAX) + { + if(ptData->sbtBlocks[ptData->sbtNodes[uNextNode].ulBlockIndex].ulMemoryType == (uint64_t)uMemoryType) + { + uNode = uNextNode; + break; + } + uNextNode = ptData->sbtNodes[uNextNode].uNextNode; + } + + if(uNode == UINT32_MAX) // could not find block with correct memory type + { + uNode = pl__create_device_node(ptInst, uMemoryType); + pl__add_node_to_freelist(ptData, 0, uNode); + } + } + } + else if(ptData->auFreeList[uLevel] == UINT32_MAX) // no nodes available at the required level + { + // get bigger block and split it and return left block + uint32_t uParentNode = pl__get_device_node(ptInst, uLevel - 1, uMemoryType); + plDeviceAllocationRange* ptParentNode = &ptData->sbtNodes[uParentNode]; + ptParentNode->ulUsedSize = UINT64_MAX; // ignore + + const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); + const uint32_t uLevelBlockCount = (1 << (uLevel - 1)); + uint32_t uIndexInLevel = (uint32_t)(ptParentNode->ulOffset / uSizeOfLevel); + + const uint32_t uLeftIndex = uParentNode + uLevelBlockCount + uIndexInLevel; + const uint32_t uRightIndex = uParentNode + uLevelBlockCount + uIndexInLevel + 1; + + pl__add_node_to_freelist(ptData, uLevel, uLeftIndex); + pl__add_node_to_freelist(ptData, uLevel, uRightIndex); + + uNode = uLeftIndex; + } + else // nodes available at required level + { + // look for block with correct memory type + uint32_t uNextNode = ptData->auFreeList[uLevel]; + while(uNextNode != UINT32_MAX) + { + const uint64_t ulBlockIndex = ptData->sbtNodes[uNextNode].ulBlockIndex; + if(ptData->sbtBlocks[ulBlockIndex].ulMemoryType == (uint64_t)uMemoryType) + { + uNode = uNextNode; + break; + } + uNextNode = ptData->sbtNodes[uNextNode].uNextNode; + } + + if(uNode == UINT32_MAX) // could not find block with correct memory type + { + uint32_t uParentNode = pl__get_device_node(ptInst, uLevel - 1, uMemoryType); + plDeviceAllocationRange* ptParentNode = &ptData->sbtNodes[uParentNode]; + + const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); + const uint32_t uLevelBlockCount = (1 << (uLevel - 1)); + uint32_t uIndexInLevel = (uint32_t)(ptParentNode->ulOffset / uSizeOfLevel); + + const uint32_t uLeftIndex = uParentNode + uLevelBlockCount + uIndexInLevel; + const uint32_t uRightIndex = uParentNode + uLevelBlockCount + uIndexInLevel + 1; + + pl__add_node_to_freelist(ptData, uLevel, uLeftIndex); + pl__add_node_to_freelist(ptData, uLevel, uRightIndex); + uNode = uLeftIndex; + } + } + + pl__remove_node_from_freelist(ptData, uLevel, uNode); + return uNode; +} + +static inline bool +pl__is_node_free(plDeviceAllocatorData* ptData, uint32_t uNode) +{ + + // find what level we need + uint32_t uLevel = 0; + for(; uLevel < PL_DEVICE_LOCAL_LEVELS; uLevel++) + { + const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << uLevel); + if(uLevelSize == ptData->sbtNodes[uNode].ulTotalSize) + { + break; + } + } + uLevel = pl_minu(uLevel, PL_DEVICE_LOCAL_LEVELS - 1); + + // check if node is in freelist + bool bInFreeList = false; + uint32_t uNextNode = ptData->auFreeList[uLevel]; + while(uNextNode != UINT32_MAX) + { + + if(uNextNode == ptData->sbtNodes[uNextNode].uNextNode) + break; + + if(uNextNode == uNode) + { + bInFreeList = true; + break; + } + uNextNode = ptData->sbtNodes[uNextNode].uNextNode; + } + + const bool bFree = ptData->sbtNodes[uNode].ulUsedSize == 0; + if(bFree) + { + PL_ASSERT(bInFreeList && "free item was not in list"); + } + return bFree; +} + +static void +pl__coalesce_nodes(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) +{ + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; + + // just return node to freelist + if(uLevel == 0) + { + pl__add_node_to_freelist(ptData, uLevel, uNode); + return; + } + + bool bBothFree = false; + uint32_t uLeftNode = uNode; + uint32_t uRightNode = uNode + 1; + + if(ptNode->ulBlockIndex % 2 == 0) + { + if(uNode % 2 == 1) // left node + { + if(pl__is_node_free(ptData, uRightNode)) + { + + bBothFree = true; + pl__remove_node_from_freelist(ptData, uLevel, uRightNode); + } + } + else + { + uLeftNode = uNode - 1; + uRightNode = uNode; + if(pl__is_node_free(ptData, uLeftNode)) + { + bBothFree = true; + pl__remove_node_from_freelist(ptData, uLevel, uLeftNode); + } + } + } + else + { + if(uNode % 2 == 1) // right node + { + if(pl__is_node_free(ptData, uLeftNode)) + { + bBothFree = true; + pl__remove_node_from_freelist(ptData, uLevel, uLeftNode); + } + } + else + { + if(pl__is_node_free(ptData, uRightNode)) + { + bBothFree = true; + pl__remove_node_from_freelist(ptData, uLevel, uRightNode); + } + } + } + + if(bBothFree) // need to coalese + { + + if(uLevel > 1) + { + // find parent node + const uint64_t uSizeOfParentLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); + const uint32_t uParentLevelBlockCount = (1 << (uLevel - 1)); + uint32_t uIndexInLevel = (uint32_t)(ptData->sbtNodes[uLeftNode].ulOffset / uSizeOfParentLevel); + const uint32_t uParentNode = uLeftNode - uParentLevelBlockCount - uIndexInLevel; + pl__coalesce_nodes(ptData, uLevel - 1, uParentNode); + } + else + { + // find parent node + const uint32_t uParentNode = uLeftNode - 1; + pl__add_node_to_freelist(ptData, 0, uParentNode); + } + ptNode->ulUsedSize = UINT64_MAX; // ignored + } + else + { + pl__add_node_to_freelist(ptData, uLevel, uNode); + } + +} + +static plDeviceMemoryAllocation +pl_allocate_dedicated(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + plDeviceAllocationBlock tBlock = gptDevice->allocate_memory(ptData->ptDevice, ulSize, PL_MEMORY_GPU, uTypeFilter, pcName); + + plDeviceMemoryAllocation tAllocation = { + .pHostMapped = NULL, + .uHandle = tBlock.ulAddress, + .ulOffset = 0, + .ulSize = ulSize, + .ptAllocator = ptData->ptAllocator, + .tMemoryMode = PL_MEMORY_GPU + }; + + uint32_t uBlockIndex = pl_sb_size(ptData->sbtBlocks); + if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) + uBlockIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); + else + pl_sb_add(ptData->sbtBlocks); + + plDeviceAllocationRange tRange = { + .ulOffset = 0, + .ulTotalSize = ulSize, + .ulUsedSize = ulSize, + .ulBlockIndex = uBlockIndex + }; + pl_sprintf(tRange.acName, "%s", pcName); + + pl_sb_push(ptData->sbtNodes, tRange); + ptData->sbtBlocks[uBlockIndex] = tBlock; + return tAllocation; +} + +static void +pl_free_dedicated(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + uint32_t uBlockIndex = 0; + uint32_t uNodeIndex = 0; + for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) + { + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; + plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; + + if(ptBlock->ulAddress == ptAllocation->uHandle) + { + uNodeIndex = i; + uBlockIndex = (uint32_t)ptNode->ulBlockIndex; + ptBlock->ulSize = 0; + break; + } + } + pl_sb_del_swap(ptData->sbtNodes, uNodeIndex); + pl_sb_push(ptData->sbtFreeBlockIndices, uBlockIndex); + + gptDevice->free_memory(ptData->ptDevice, &ptData->sbtBlocks[uBlockIndex]); + + ptAllocation->pHostMapped = NULL; + ptAllocation->uHandle = 0; + ptAllocation->ulOffset = 0; + ptAllocation->ulSize = 0; +} + +static inline uint32_t +pl__get_buddy_level(uint64_t ulSize) +{ + uint32_t uLevel = 0; + for(uint32_t i = 0; i < PL_DEVICE_LOCAL_LEVELS; i++) + { + const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << i); + if(uLevelSize <= ulSize) + { + break; + } + uLevel = i; + } + return uLevel; +} + +static plDeviceMemoryAllocation +pl_allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + if(ulAlignment > 0) + ulSize = ulSize + (ulAlignment - 1); + + const uint32_t uLevel = pl__get_buddy_level(ulSize); + const uint32_t uNode = pl__get_device_node(ptInst, uLevel, 0); + PL_ASSERT(uNode != UINT32_MAX); + + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; + strncpy(ptNode->acName, pcName, PL_MAX_NAME_LENGTH); + ptNode->ulUsedSize = ulSize; + + const uint32_t uBlockCount = pl_sb_size(ptData->sbtBlocks); + plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; + + plDeviceMemoryAllocation tAllocation = { + .pHostMapped = NULL, + .uHandle = (uint64_t)ptBlock->ulAddress, + .ulOffset = ptNode->ulOffset, + .ulSize = ulSize, + .ptAllocator = ptData->ptAllocator, + .tMemoryMode = PL_MEMORY_GPU + }; + + if(ulAlignment > 0) + tAllocation.ulOffset = (((tAllocation.ulOffset) + ((ulAlignment)-1)) & ~((ulAlignment)-1)); + + if(tAllocation.uHandle == 0) + { + ptBlock->ulAddress = gptDevice->allocate_memory(ptData->ptDevice, PL_DEVICE_BUDDY_BLOCK_SIZE, PL_MEMORY_GPU, uTypeFilter, "Buddy Heap").ulAddress; + tAllocation.uHandle = (uint64_t)ptBlock->ulAddress; + } + + return tAllocation; +} + +static void +pl_free_buddy(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + // find associated node + uint32_t uNodeIndex = 0; + plDeviceAllocationRange* ptNode = NULL; + for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) + { + plDeviceAllocationRange* ptIntermediateNode = &ptData->sbtNodes[i]; + plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptIntermediateNode->ulBlockIndex]; + + if(ptBlock->ulAddress == ptAllocation->uHandle && ptIntermediateNode->ulOffset == ptAllocation->ulOffset && ptIntermediateNode->ulUsedSize == ptAllocation->ulSize) + { + ptNode = &ptData->sbtNodes[i]; + uNodeIndex = (uint32_t)i; + break; + } + } + + // find what level we need + uint32_t uLevel = 0; + for(; uLevel < PL_DEVICE_LOCAL_LEVELS; uLevel++) + { + const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << uLevel); + if(uLevelSize == ptNode->ulTotalSize) + { + break; + } + } + uLevel = pl_minu(uLevel, PL_DEVICE_LOCAL_LEVELS - 1); + pl__coalesce_nodes(ptData, uLevel, uNodeIndex); + strncpy(ptNode->acName, "not used", PL_MAX_NAME_LENGTH); +} + +static plDeviceMemoryAllocation +pl_allocate_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + plDeviceMemoryAllocation tAllocation = { + .pHostMapped = NULL, + .uHandle = 0, + .ulOffset = 0, + .ulSize = ulSize, + .ptAllocator = ptData->ptAllocator, + .tMemoryMode = PL_MEMORY_GPU_CPU + }; + + // check for existing block + for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) + { + plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; + plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; + if(ptNode->ulUsedSize == 0 && ptNode->ulTotalSize >= ulSize && ptBlock->ulAddress != 0) + { + ptNode->ulUsedSize = ulSize; + pl_sprintf(ptNode->acName, "%s", pcName); + tAllocation.pHostMapped = ptBlock->pHostMapped; + tAllocation.uHandle = ptBlock->ulAddress; + tAllocation.ulOffset = 0; + tAllocation.ulSize = ptBlock->ulSize; + return tAllocation; + } + } + + uint32_t uIndex = UINT32_MAX; + if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) + { + uIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); + } + else + { + uIndex = pl_sb_size(ptData->sbtBlocks); + pl_sb_add(ptData->sbtNodes); + pl_sb_add(ptData->sbtBlocks); + } + + plDeviceAllocationRange tRange = { + .ulOffset = 0, + .ulUsedSize = ulSize, + .ulTotalSize = ulSize, + .ulBlockIndex = uIndex + }; + pl_sprintf(tRange.acName, "%s", pcName); + + plDeviceAllocationBlock tBlock = gptDevice->allocate_memory(ptData->ptDevice, ulSize, PL_MEMORY_GPU_CPU, uTypeFilter, "Uncached Heap"); + tAllocation.uHandle = tBlock.ulAddress; + ptData->sbtNodes[uIndex] = tRange; + ptData->sbtBlocks[uIndex] = tBlock; + tAllocation.pHostMapped = tBlock.pHostMapped; + return tAllocation; +} + +static void +pl_free_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +{ + plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + + for(uint32_t i = 0; i < pl_sb_size(ptData->sbtBlocks); i++) + { + plDeviceAllocationRange* ptRange = &ptData->sbtNodes[i]; + plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptRange->ulBlockIndex]; + + // find block + if(ptBlock->ulAddress == ptAllocation->uHandle) + { + ptRange->ulUsedSize = 0; + memset(ptRange->acName, 0, PL_MAX_NAME_LENGTH); + strncpy(ptRange->acName, "not used", PL_MAX_NAME_LENGTH); + break; + } + } +} + +static plDeviceMemoryAllocatorI* +pl_create_local_dedicated_allocator(plDevice* ptDevice) +{ + static plDeviceAllocatorData tAllocatorData = {0}; + static plDeviceMemoryAllocatorI tAllocator = {0}; + tAllocatorData.ptDevice = ptDevice; + tAllocatorData.ptAllocator = &tAllocator; + tAllocator.allocate = pl_allocate_dedicated; + tAllocator.free = pl_free_dedicated; + tAllocator.blocks = pl_get_allocator_blocks; + tAllocator.ranges = pl_get_allocator_ranges; + tAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tAllocatorData; + return &tAllocator; +} + +static plDeviceMemoryAllocatorI* +pl_create_local_buddy_allocator(plDevice* ptDevice) +{ + static plDeviceAllocatorData tAllocatorData = {0}; + static plDeviceMemoryAllocatorI tAllocator = {0}; + + // first run + if(tAllocatorData.auFreeList[0] == 0) + { + for(uint32_t i = 0; i < PL_DEVICE_LOCAL_LEVELS; i++) + tAllocatorData.auFreeList[i] = UINT32_MAX; + } + tAllocatorData.ptDevice = ptDevice; + tAllocatorData.ptAllocator = &tAllocator; + tAllocator.allocate = pl_allocate_buddy; + tAllocator.free = pl_free_buddy; + tAllocator.blocks = pl_get_allocator_blocks; + tAllocator.ranges = pl_get_allocator_ranges; + tAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tAllocatorData; + return &tAllocator; +} + +static plDeviceMemoryAllocatorI* +pl_create_staging_uncached_allocator(plDevice* ptDevice) +{ + static plDeviceAllocatorData tAllocatorData = {0}; + static plDeviceMemoryAllocatorI tAllocator = {0}; + tAllocatorData.ptDevice = ptDevice; + tAllocatorData.ptAllocator = &tAllocator; + tAllocator.allocate = pl_allocate_staging_uncached; + tAllocator.free = pl_free_staging_uncached; + tAllocator.blocks = pl_get_allocator_blocks; + tAllocator.ranges = pl_get_allocator_ranges; + tAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tAllocatorData; + return &tAllocator; +} + +static void +pl_cleanup_allocators(plDevice* ptDevice) +{ + plDeviceMemoryAllocatorI* ptAllocator = pl_create_local_buddy_allocator(ptDevice); + plDeviceAllocatorData* ptAllocatorData = (plDeviceAllocatorData*)ptAllocator->ptInst; + + for(uint32_t i = 0; i < pl_sb_size(ptAllocatorData->sbtBlocks); i++) + { + if(ptAllocatorData->sbtBlocks[i].ulAddress) + gptDevice->free_memory(ptDevice, &ptAllocatorData->sbtBlocks[i]); + } + pl_sb_free(ptAllocatorData->sbtBlocks); + pl_sb_free(ptAllocatorData->sbtNodes); + pl_sb_free(ptAllocatorData->sbtFreeBlockIndices); + + ptAllocator = pl_create_local_dedicated_allocator(ptDevice); + ptAllocatorData = (plDeviceAllocatorData*)ptAllocator->ptInst; + for(uint32_t i = 0; i < pl_sb_size(ptAllocatorData->sbtBlocks); i++) + { + if(ptAllocatorData->sbtBlocks[i].ulAddress) + gptDevice->free_memory(ptDevice, &ptAllocatorData->sbtBlocks[i]); + } + pl_sb_free(ptAllocatorData->sbtBlocks); + pl_sb_free(ptAllocatorData->sbtNodes); + pl_sb_free(ptAllocatorData->sbtFreeBlockIndices); + + ptAllocator = pl_create_staging_uncached_allocator(ptDevice); + ptAllocatorData = (plDeviceAllocatorData*)ptAllocator->ptInst; + for(uint32_t i = 0; i < pl_sb_size(ptAllocatorData->sbtBlocks); i++) + { + if(ptAllocatorData->sbtBlocks[i].ulAddress) + gptDevice->free_memory(ptDevice, &ptAllocatorData->sbtBlocks[i]); + } + pl_sb_free(ptAllocatorData->sbtBlocks); + pl_sb_free(ptAllocatorData->sbtNodes); + pl_sb_free(ptAllocatorData->sbtFreeBlockIndices); +} + +//----------------------------------------------------------------------------- +// [SECTION] public api implementation +//----------------------------------------------------------------------------- + +const plGPUAllocatorsI* +pl_load_gpu_allocators_api(void) +{ + static const plGPUAllocatorsI tApi = { + .create_local_dedicated_allocator = pl_create_local_dedicated_allocator, + .create_local_buddy_allocator = pl_create_local_buddy_allocator, + .create_staging_uncached_allocator = pl_create_staging_uncached_allocator, + .cleanup_allocators = pl_cleanup_allocators + }; + return &tApi; +} + +//----------------------------------------------------------------------------- +// [SECTION] extension loading +//----------------------------------------------------------------------------- + +PL_EXPORT void +pl_load_ext(plApiRegistryI* ptApiRegistry, bool bReload) +{ + const plDataRegistryI* ptDataRegistry = ptApiRegistry->first(PL_API_DATA_REGISTRY); + pl_set_memory_context(ptDataRegistry->get_data(PL_CONTEXT_MEMORY)); + + gptDevice = ptApiRegistry->first(PL_API_DEVICE); + gptGfx = ptApiRegistry->first(PL_API_GRAPHICS); + + if(bReload) + ptApiRegistry->replace(ptApiRegistry->first(PL_API_GPU_ALLOCATORS), pl_load_gpu_allocators_api()); + else + ptApiRegistry->add(PL_API_GPU_ALLOCATORS, pl_load_gpu_allocators_api()); +} + +PL_EXPORT void +pl_unload_ext(plApiRegistryI* ptApiRegistry) +{ + +} \ No newline at end of file diff --git a/extensions/pl_gpu_allocators_ext.h b/extensions/pl_gpu_allocators_ext.h new file mode 100644 index 00000000..4178fd75 --- /dev/null +++ b/extensions/pl_gpu_allocators_ext.h @@ -0,0 +1,70 @@ +/* + pl_gpu_allocators_ext.h +*/ + +/* +Index of this file: +// [SECTION] header mess +// [SECTION] defines +// [SECTION] forward declarations +// [SECTION] APIs +// [SECTION] public api +// [SECTION] public api structs +*/ + +//----------------------------------------------------------------------------- +// [SECTION] header mess +//----------------------------------------------------------------------------- + +#ifndef PL_GPU_ALLOCATORS_EXT_H +#define PL_GPU_ALLOCATORS_EXT_H + +#define PL_GPU_ALLOCATORS_EXT_VERSION "0.9.0" +#define PL_GPU_ALLOCATORS_EXT_VERSION_NUM 000900 + +//----------------------------------------------------------------------------- +// [SECTION] defines +//----------------------------------------------------------------------------- + +#ifndef PL_DEVICE_BUDDY_BLOCK_SIZE + #define PL_DEVICE_BUDDY_BLOCK_SIZE 268435456 +#endif + +#ifndef PL_DEVICE_LOCAL_LEVELS + #define PL_DEVICE_LOCAL_LEVELS 8 +#endif + +//----------------------------------------------------------------------------- +// [SECTION] forward declarations +//----------------------------------------------------------------------------- + +typedef struct _plDeviceMemoryAllocatorI plDeviceMemoryAllocatorI; +typedef struct _plDevice plDevice; + +//----------------------------------------------------------------------------- +// [SECTION] APIs +//----------------------------------------------------------------------------- + +#define PL_API_GPU_ALLOCATORS "PL_API_GPU_ALLOCATORS" +typedef struct _plGPUAllocatorsI plGPUAllocatorsI; + +//----------------------------------------------------------------------------- +// [SECTION] public api +//----------------------------------------------------------------------------- + +const plGPUAllocatorsI* pl_load_gpu_allocators_api(void); + +//----------------------------------------------------------------------------- +// [SECTION] public api structs +//----------------------------------------------------------------------------- + +typedef struct _plGPUAllocatorsI +{ + plDeviceMemoryAllocatorI* (*create_local_dedicated_allocator) (plDevice* ptDevice); + plDeviceMemoryAllocatorI* (*create_local_buddy_allocator) (plDevice* ptDevice); + plDeviceMemoryAllocatorI* (*create_staging_uncached_allocator)(plDevice* ptDevice); + + void (*cleanup_allocators)(plDevice* ptDevice); +} plGPUAllocatorsI; + +#endif // PL_GPU_ALLOCATORS_EXT_H \ No newline at end of file diff --git a/extensions/pl_graphics_ext.c b/extensions/pl_graphics_ext.c index 738a29de..5db420e5 100644 --- a/extensions/pl_graphics_ext.c +++ b/extensions/pl_graphics_ext.c @@ -484,424 +484,6 @@ pl__add_3d_bezier_cubic(plDrawList3D* ptDrawlist, plVec3 tP0, plVec3 tP1, plVec3 pl__add_3d_line(ptDrawlist, tVerticies[0], tVerticies[1], tColor, fThickness); } -//----------------------------------------------------------------------------- -// [SECTION] allocator stuff -//----------------------------------------------------------------------------- - -typedef struct _plDeviceAllocatorData -{ - plDevice* ptDevice; - plDeviceAllocationBlock* sbtBlocks; - uint32_t* sbtFreeBlockIndices; - - // buddy allocator data - plDeviceAllocationRange* sbtNodes; - uint32_t auFreeList[PL_DEVICE_LOCAL_LEVELS]; -} plDeviceAllocatorData; - -static plDeviceAllocationBlock* -pl_get_allocator_blocks(struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - if(puSizeOut) - { - *puSizeOut = pl_sb_size(ptData->sbtBlocks); - } - return ptData->sbtBlocks; -} - -static plDeviceAllocationRange* -pl_get_allocator_ranges(struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - if(puSizeOut) - { - *puSizeOut = pl_sb_size(ptData->sbtNodes); - } - return ptData->sbtNodes; -} - -static void -pl__add_node_to_freelist(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) -{ - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; - ptNode->ulUsedSize = 0; - ptData->sbtNodes[uNode].uNextNode = ptData->auFreeList[uLevel]; - ptData->auFreeList[uLevel] = uNode; -} - -static void -pl__remove_node_from_freelist(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) -{ - - bool bFound = false; - if(ptData->auFreeList[uLevel] == uNode) - { - ptData->auFreeList[uLevel] = ptData->sbtNodes[uNode].uNextNode; - bFound = true; - } - else - { - uint32_t uNextNode = ptData->auFreeList[uLevel]; - while(uNextNode != UINT32_MAX) - { - uint32_t uPrevNode = uNextNode; - uNextNode = ptData->sbtNodes[uPrevNode].uNextNode; - - if(uNextNode == uNode) - { - ptData->sbtNodes[uPrevNode].uNextNode = ptData->sbtNodes[uNode].uNextNode; - bFound = true; - break; - } - } - } - - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; - ptNode->ulUsedSize = UINT64_MAX; // ignored - ptNode->uNextNode = UINT32_MAX; - PL_ASSERT(bFound && "could not find node to remove"); -} - -static uint32_t -pl__create_device_node(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uMemoryType) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - uint32_t uNode = UINT32_MAX; - - plDeviceAllocationBlock tBlock = { - .ulAddress = 0, - .ulSize = PL_DEVICE_BUDDY_BLOCK_SIZE, - .ulMemoryType = uMemoryType - }; - - uNode = pl_sb_size(ptData->sbtNodes); - uint32_t uNodeIndex = uNode; - pl_sb_resize(ptData->sbtNodes, pl_sb_size(ptData->sbtNodes) + (1 << PL_DEVICE_LOCAL_LEVELS) - 1); - const uint32_t uBlockIndex = pl_sb_size(ptData->sbtBlocks); - for(uint32_t uLevelIndex = 0; uLevelIndex < PL_DEVICE_LOCAL_LEVELS; uLevelIndex++) - { - const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)uLevelIndex); - const uint32_t uLevelBlockCount = (1 << uLevelIndex); - uint64_t uCurrentOffset = 0; - for(uint32_t i = 0; i < uLevelBlockCount; i++) - { - ptData->sbtNodes[uNodeIndex].uNodeIndex = uNodeIndex; - ptData->sbtNodes[uNodeIndex].uNextNode = UINT32_MAX; - ptData->sbtNodes[uNodeIndex].ulOffset = uCurrentOffset; - ptData->sbtNodes[uNodeIndex].ulTotalSize = uSizeOfLevel; - ptData->sbtNodes[uNodeIndex].ulBlockIndex = uBlockIndex; - strncpy(ptData->sbtNodes[uNodeIndex].acName, "not used", PL_MAX_NAME_LENGTH); - uCurrentOffset += uSizeOfLevel; - uNodeIndex++; - } - } - pl_sb_push(ptData->sbtBlocks, tBlock); - return uNode; -} - -static uint32_t -pl__get_device_node(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uLevel, uint32_t uMemoryType) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - uint32_t uNode = UINT32_MAX; - - if(uLevel == 0) - { - if(ptData->auFreeList[0] == UINT32_MAX) // no nodes available - { - uNode = pl__create_device_node(ptInst, uMemoryType); - pl__add_node_to_freelist(ptData, 0, uNode); - } - else // nodes available - { - // look for block with correct memory type - uint32_t uNextNode = ptData->auFreeList[0]; - while(uNextNode != UINT32_MAX) - { - if(ptData->sbtBlocks[ptData->sbtNodes[uNextNode].ulBlockIndex].ulMemoryType == (uint64_t)uMemoryType) - { - uNode = uNextNode; - break; - } - uNextNode = ptData->sbtNodes[uNextNode].uNextNode; - } - - if(uNode == UINT32_MAX) // could not find block with correct memory type - { - uNode = pl__create_device_node(ptInst, uMemoryType); - pl__add_node_to_freelist(ptData, 0, uNode); - } - } - } - else if(ptData->auFreeList[uLevel] == UINT32_MAX) // no nodes available at the required level - { - // get bigger block and split it and return left block - uint32_t uParentNode = pl__get_device_node(ptInst, uLevel - 1, uMemoryType); - plDeviceAllocationRange* ptParentNode = &ptData->sbtNodes[uParentNode]; - ptParentNode->ulUsedSize = UINT64_MAX; // ignore - - const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); - const uint32_t uLevelBlockCount = (1 << (uLevel - 1)); - uint32_t uIndexInLevel = (uint32_t)(ptParentNode->ulOffset / uSizeOfLevel); - - const uint32_t uLeftIndex = uParentNode + uLevelBlockCount + uIndexInLevel; - const uint32_t uRightIndex = uParentNode + uLevelBlockCount + uIndexInLevel + 1; - - pl__add_node_to_freelist(ptData, uLevel, uLeftIndex); - pl__add_node_to_freelist(ptData, uLevel, uRightIndex); - - uNode = uLeftIndex; - } - else // nodes available at required level - { - // look for block with correct memory type - uint32_t uNextNode = ptData->auFreeList[uLevel]; - while(uNextNode != UINT32_MAX) - { - const uint64_t ulBlockIndex = ptData->sbtNodes[uNextNode].ulBlockIndex; - if(ptData->sbtBlocks[ulBlockIndex].ulMemoryType == (uint64_t)uMemoryType) - { - uNode = uNextNode; - break; - } - uNextNode = ptData->sbtNodes[uNextNode].uNextNode; - } - - if(uNode == UINT32_MAX) // could not find block with correct memory type - { - uint32_t uParentNode = pl__get_device_node(ptInst, uLevel - 1, uMemoryType); - plDeviceAllocationRange* ptParentNode = &ptData->sbtNodes[uParentNode]; - - const uint64_t uSizeOfLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); - const uint32_t uLevelBlockCount = (1 << (uLevel - 1)); - uint32_t uIndexInLevel = (uint32_t)(ptParentNode->ulOffset / uSizeOfLevel); - - const uint32_t uLeftIndex = uParentNode + uLevelBlockCount + uIndexInLevel; - const uint32_t uRightIndex = uParentNode + uLevelBlockCount + uIndexInLevel + 1; - - pl__add_node_to_freelist(ptData, uLevel, uLeftIndex); - pl__add_node_to_freelist(ptData, uLevel, uRightIndex); - uNode = uLeftIndex; - } - } - - pl__remove_node_from_freelist(ptData, uLevel, uNode); - return uNode; -} - -static inline bool -pl__is_node_free(plDeviceAllocatorData* ptData, uint32_t uNode) -{ - - // find what level we need - uint32_t uLevel = 0; - for(; uLevel < PL_DEVICE_LOCAL_LEVELS; uLevel++) - { - const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << uLevel); - if(uLevelSize == ptData->sbtNodes[uNode].ulTotalSize) - { - break; - } - } - uLevel = pl_minu(uLevel, PL_DEVICE_LOCAL_LEVELS - 1); - - // check if node is in freelist - bool bInFreeList = false; - uint32_t uNextNode = ptData->auFreeList[uLevel]; - while(uNextNode != UINT32_MAX) - { - - if(uNextNode == ptData->sbtNodes[uNextNode].uNextNode) - break; - - if(uNextNode == uNode) - { - bInFreeList = true; - break; - } - uNextNode = ptData->sbtNodes[uNextNode].uNextNode; - } - - const bool bFree = ptData->sbtNodes[uNode].ulUsedSize == 0; - if(bFree) - { - PL_ASSERT(bInFreeList && "free item was not in list"); - } - return bFree; -} - -static void -pl__coalesce_nodes(plDeviceAllocatorData* ptData, uint32_t uLevel, uint32_t uNode) -{ - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; - - // just return node to freelist - if(uLevel == 0) - { - pl__add_node_to_freelist(ptData, uLevel, uNode); - return; - } - - bool bBothFree = false; - uint32_t uLeftNode = uNode; - uint32_t uRightNode = uNode + 1; - - if(ptNode->ulBlockIndex % 2 == 0) - { - if(uNode % 2 == 1) // left node - { - if(pl__is_node_free(ptData, uRightNode)) - { - - bBothFree = true; - pl__remove_node_from_freelist(ptData, uLevel, uRightNode); - } - } - else - { - uLeftNode = uNode - 1; - uRightNode = uNode; - if(pl__is_node_free(ptData, uLeftNode)) - { - bBothFree = true; - pl__remove_node_from_freelist(ptData, uLevel, uLeftNode); - } - } - } - else - { - if(uNode % 2 == 1) // right node - { - if(pl__is_node_free(ptData, uLeftNode)) - { - bBothFree = true; - pl__remove_node_from_freelist(ptData, uLevel, uLeftNode); - } - } - else - { - if(pl__is_node_free(ptData, uRightNode)) - { - bBothFree = true; - pl__remove_node_from_freelist(ptData, uLevel, uRightNode); - } - } - } - - if(bBothFree) // need to coalese - { - - if(uLevel > 1) - { - // find parent node - const uint64_t uSizeOfParentLevel = PL_DEVICE_BUDDY_BLOCK_SIZE / ((uint64_t)1 << (uint64_t)(uLevel - 1)); - const uint32_t uParentLevelBlockCount = (1 << (uLevel - 1)); - uint32_t uIndexInLevel = (uint32_t)(ptData->sbtNodes[uLeftNode].ulOffset / uSizeOfParentLevel); - const uint32_t uParentNode = uLeftNode - uParentLevelBlockCount - uIndexInLevel; - pl__coalesce_nodes(ptData, uLevel - 1, uParentNode); - } - else - { - // find parent node - const uint32_t uParentNode = uLeftNode - 1; - pl__add_node_to_freelist(ptData, 0, uParentNode); - } - ptNode->ulUsedSize = UINT64_MAX; // ignored - } - else - { - pl__add_node_to_freelist(ptData, uLevel, uNode); - } - -} - -static inline uint32_t -pl__get_buddy_level(uint64_t ulSize) -{ - uint32_t uLevel = 0; - for(uint32_t i = 0; i < PL_DEVICE_LOCAL_LEVELS; i++) - { - const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << i); - if(uLevelSize <= ulSize) - { - break; - } - uLevel = i; - } - return uLevel; -} - -static plDeviceMemoryAllocation -pl__allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName, uint32_t uMemoryType) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - if(ulAlignment > 0) - ulSize = ulSize + (ulAlignment - 1); - - const uint32_t uLevel = pl__get_buddy_level(ulSize); - const uint32_t uNode = pl__get_device_node(ptInst, uLevel, uMemoryType); - PL_ASSERT(uNode != UINT32_MAX); - - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[uNode]; - strncpy(ptNode->acName, pcName, PL_MAX_NAME_LENGTH); - ptNode->ulUsedSize = ulSize; - - const uint32_t uBlockCount = pl_sb_size(ptData->sbtBlocks); - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = (uint64_t)ptBlock->ulAddress, - .ulOffset = ptNode->ulOffset, - .ulSize = ulSize, - .ptInst = ptInst, - }; - - if(ulAlignment > 0) - tAllocation.ulOffset = (((tAllocation.ulOffset) + ((ulAlignment)-1)) & ~((ulAlignment)-1)); - return tAllocation; -} - -static void -pl_free_buddy(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - // find associated node - uint32_t uNodeIndex = 0; - plDeviceAllocationRange* ptNode = NULL; - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptIntermediateNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptIntermediateNode->ulBlockIndex]; - - if(ptBlock->ulAddress == ptAllocation->uHandle && ptIntermediateNode->ulOffset == ptAllocation->ulOffset && ptIntermediateNode->ulUsedSize == ptAllocation->ulSize) - { - ptNode = &ptData->sbtNodes[i]; - uNodeIndex = (uint32_t)i; - break; - } - } - - // find what level we need - uint32_t uLevel = 0; - for(; uLevel < PL_DEVICE_LOCAL_LEVELS; uLevel++) - { - const uint64_t uLevelSize = PL_DEVICE_BUDDY_BLOCK_SIZE / (1 << uLevel); - if(uLevelSize == ptNode->ulTotalSize) - { - break; - } - } - uLevel = pl_minu(uLevel, PL_DEVICE_LOCAL_LEVELS - 1); - pl__coalesce_nodes(ptData, uLevel, uNodeIndex); - strncpy(ptNode->acName, "not used", PL_MAX_NAME_LENGTH); -} - //----------------------------------------------------------------------------- // [SECTION] enums //----------------------------------------------------------------------------- @@ -1112,25 +694,6 @@ pl__cleanup_common_graphics(plGraphics* ptGraphics) pl_sb_free(ptGarbage->sbtBindGroups); } - plDeviceAllocatorData* ptData0 = (plDeviceAllocatorData*)ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst; - plDeviceAllocatorData* ptData1 = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst; - plDeviceAllocatorData* ptData2 = (plDeviceAllocatorData*)ptGraphics->tDevice.tLocalBuddyAllocator.ptInst; - plDeviceAllocatorData* ptData3 = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingCachedAllocator.ptInst; - pl_sb_free(ptData0->sbtBlocks); - pl_sb_free(ptData1->sbtBlocks); - pl_sb_free(ptData2->sbtBlocks); - pl_sb_free(ptData3->sbtBlocks); - - pl_sb_free(ptData0->sbtNodes); - pl_sb_free(ptData1->sbtNodes); - pl_sb_free(ptData2->sbtNodes); - pl_sb_free(ptData3->sbtNodes); - - pl_sb_free(ptData0->sbtFreeBlockIndices); - pl_sb_free(ptData1->sbtFreeBlockIndices); - pl_sb_free(ptData2->sbtFreeBlockIndices); - pl_sb_free(ptData3->sbtFreeBlockIndices); - pl_sb_free(ptGraphics->sbtGarbage); pl_sb_free(ptGraphics->tSwapchain.sbtSwapchainTextureViews); pl_sb_free(ptGraphics->sbtShadersCold); @@ -1159,4 +722,4 @@ pl__cleanup_common_graphics(plGraphics* ptGraphics) PL_FREE(ptGraphics->_pInternalData); PL_FREE(ptGraphics->tDevice._pInternalData); PL_FREE(ptGraphics->tSwapchain._pInternalData); -} \ No newline at end of file +} diff --git a/extensions/pl_graphics_ext.h b/extensions/pl_graphics_ext.h index ca56bf28..da2fe484 100644 --- a/extensions/pl_graphics_ext.h +++ b/extensions/pl_graphics_ext.h @@ -20,12 +20,8 @@ Index of this file: #ifndef PL_GRAPHICS_EXT_H #define PL_GRAPHICS_EXT_H -#define PL_GRAPHICS_EXT_VERSION "0.9.0" -#define PL_GRAPHICS_EXT_VERSION_NUM 000900 - -#ifndef PL_DEVICE_BUDDY_BLOCK_SIZE - #define PL_DEVICE_BUDDY_BLOCK_SIZE 268435456 -#endif +#define PL_GRAPHICS_EXT_VERSION "0.10.0" +#define PL_GRAPHICS_EXT_VERSION_NUM 001000 #ifndef PL_DEVICE_ALLOCATION_BLOCK_SIZE #define PL_DEVICE_ALLOCATION_BLOCK_SIZE 134217728 @@ -35,10 +31,6 @@ Index of this file: #define PL_MAX_DYNAMIC_DATA_SIZE 512 #endif -#ifndef PL_DEVICE_LOCAL_LEVELS - #define PL_DEVICE_LOCAL_LEVELS 8 -#endif - #ifndef PL_MAX_BUFFERS_PER_BIND_GROUP #define PL_MAX_BUFFERS_PER_BIND_GROUP 32 #endif @@ -175,10 +167,11 @@ PL_DEFINE_HANDLE(plRenderPassLayoutHandle); PL_DEFINE_HANDLE(plSemaphoreHandle); // device memory -typedef struct _plDeviceAllocationRange plDeviceAllocationRange; -typedef struct _plDeviceAllocationBlock plDeviceAllocationBlock; -typedef struct _plDeviceMemoryAllocation plDeviceMemoryAllocation; -typedef struct _plDeviceMemoryAllocatorI plDeviceMemoryAllocatorI; +typedef struct _plDeviceMemoryRequirements plDeviceMemoryRequirements; +typedef struct _plDeviceAllocationRange plDeviceAllocationRange; +typedef struct _plDeviceAllocationBlock plDeviceAllocationBlock; +typedef struct _plDeviceMemoryAllocation plDeviceMemoryAllocation; +typedef struct _plDeviceMemoryAllocatorI plDeviceMemoryAllocatorI; // 3D drawing api typedef struct _plDrawList3D plDrawList3D; @@ -226,8 +219,10 @@ typedef struct _plDrawStreamI typedef struct _plDeviceI { + // buffers plBufferHandle (*create_buffer) (plDevice* ptDevice, const plBufferDescription* ptDesc, const char* pcName); + void (*bind_buffer_to_memory) (plDevice* ptDevice, plBufferHandle tHandle, const plDeviceMemoryAllocation* ptAllocation); void (*queue_buffer_for_deletion)(plDevice* ptDevice, plBufferHandle tHandle); void (*destroy_buffer) (plDevice* ptDevice, plBufferHandle tHandle); plBuffer* (*get_buffer) (plDevice* ptDevice, plBufferHandle ptHandle); // do not store @@ -240,6 +235,7 @@ typedef struct _plDeviceI // textures (if manually handling mips/levels, don't use initial data, use "copy_buffer_to_texture" instead) plTextureHandle (*create_texture) (plDevice* ptDevice, const plTextureDesc* ptDesc, const char* pcName); plTextureHandle (*create_texture_view) (plDevice* ptDevice, const plTextureViewDesc* ptDesc, const char* pcName); + void (*bind_texture_to_memory) (plDevice* ptDevice, plTextureHandle tHandle, const plDeviceMemoryAllocation* ptAllocation); void (*queue_texture_for_deletion)(plDevice* ptDevice, plTextureHandle tHandle); void (*destroy_texture) (plDevice* ptDevice, plTextureHandle tHandle); plTexture* (*get_texture) (plDevice* ptDevice, plTextureHandle ptHandle); // do not store @@ -251,7 +247,7 @@ typedef struct _plDeviceI void (*queue_bind_group_for_deletion)(plDevice* ptDevice, plBindGroupHandle tHandle); void (*destroy_bind_group) (plDevice* ptDevice, plBindGroupHandle tHandle); plBindGroup* (*get_bind_group) (plDevice* ptDevice, plBindGroupHandle ptHandle); // do not store - plDynamicBinding (*allocate_dynamic_data) (plDevice* ptDevice, size_t szSize); + // render passes plRenderPassLayoutHandle (*create_render_pass_layout) (plDevice* ptDevice, const plRenderPassLayoutDescription* ptDesc); @@ -273,6 +269,15 @@ typedef struct _plDeviceI // syncronization plSemaphoreHandle (*create_semaphore)(plDevice* ptDevice, bool bHostVisible); + + // memory + plDynamicBinding (*allocate_dynamic_data)(plDevice* ptDevice, size_t szSize); + plDeviceAllocationBlock (*allocate_memory)(plDevice* ptDevice, uint64_t ulSize, plMemoryMode tMemoryMode, uint32_t uTypeFilter, const char* pcName); + void (*free_memory)(plDevice* ptDevice, plDeviceAllocationBlock* ptBlock); + + // misc + void (*flush_device)(plDevice* ptDevice); + } plDeviceI; typedef struct _plGraphicsI @@ -454,11 +459,12 @@ typedef struct _plDrawList3D typedef struct _plDeviceMemoryAllocation { - uint64_t uHandle; - uint64_t ulOffset; - uint64_t ulSize; - char* pHostMapped; - struct plDeviceMemoryAllocatorO* ptInst; + plMemoryMode tMemoryMode; + uint64_t uHandle; + uint64_t ulOffset; + uint64_t ulSize; + char* pHostMapped; + plDeviceMemoryAllocatorI* ptAllocator; } plDeviceMemoryAllocation; typedef struct _plDeviceAllocationRange @@ -472,14 +478,23 @@ typedef struct _plDeviceAllocationRange uint32_t uNextNode; } plDeviceAllocationRange; +typedef struct _plDeviceMemoryRequirements +{ + uint64_t ulSize; + uint64_t ulAlignment; + uint32_t uMemoryTypeBits; +} plDeviceMemoryRequirements; + typedef struct _plDeviceAllocationBlock { - uint64_t ulMemoryType; - uint64_t ulAddress; - uint64_t ulSize; - char* pHostMapped; - uint32_t uCurrentIndex; // used but debug tool - double dLastTimeUsed; + plMemoryMode tMemoryMode; + uint64_t ulMemoryType; + uint64_t ulAddress; + uint64_t ulSize; + char* pHostMapped; + uint32_t uCurrentIndex; // used but debug tool + double dLastTimeUsed; + plDeviceMemoryAllocatorI *ptAllocator; } plDeviceAllocationBlock; typedef struct _plDeviceMemoryAllocatorI @@ -489,6 +504,8 @@ typedef struct _plDeviceMemoryAllocatorI plDeviceMemoryAllocation (*allocate)(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); void (*free) (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); + + // for debug views plDeviceAllocationBlock* (*blocks) (struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut); plDeviceAllocationRange* (*ranges) (struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut); } plDeviceMemoryAllocatorI; @@ -521,34 +538,36 @@ typedef struct _plSampler typedef struct _plTextureDesc { - plVec3 tDimensions; - uint32_t uLayers; - uint32_t uMips; - plFormat tFormat; - plTextureType tType; - plTextureUsage tUsage; - plTextureUsage tInitialUsage; + char acDebugName[PL_MAX_NAME_LENGTH]; + plVec3 tDimensions; + uint32_t uLayers; + uint32_t uMips; + plFormat tFormat; + plTextureType tType; + plTextureUsage tUsage; + plTextureUsage tInitialUsage; } plTextureDesc; typedef struct _plTexture { - plTextureDesc tDesc; - plTextureViewDesc tView; - plDeviceMemoryAllocation tMemoryAllocation; + plTextureDesc tDesc; + plTextureViewDesc tView; + plDeviceMemoryRequirements tMemoryRequirements; + plDeviceMemoryAllocation tMemoryAllocation; } plTexture; typedef struct _plBufferDescription { char acDebugName[PL_MAX_NAME_LENGTH]; plBufferUsage tUsage; - plMemoryMode tMemory; uint32_t uByteSize; } plBufferDescription; typedef struct _plBuffer { - plBufferDescription tDescription; - plDeviceMemoryAllocation tMemoryAllocation; + plBufferDescription tDescription; + plDeviceMemoryRequirements tMemoryRequirements; + plDeviceMemoryAllocation tMemoryAllocation; } plBuffer; typedef struct _plBufferBinding @@ -810,10 +829,7 @@ typedef struct _plRenderPass typedef struct _plDevice { plGraphics* ptGraphics; - plDeviceMemoryAllocatorI tLocalDedicatedAllocator; - plDeviceMemoryAllocatorI tLocalBuddyAllocator; - plDeviceMemoryAllocatorI tStagingUnCachedAllocator; - plDeviceMemoryAllocatorI tStagingCachedAllocator; + plDeviceMemoryAllocatorI* ptDynamicAllocator; void* _pInternalData; } plDevice; @@ -994,7 +1010,8 @@ enum _plBufferUsage PL_BUFFER_USAGE_INDEX, PL_BUFFER_USAGE_VERTEX, PL_BUFFER_USAGE_UNIFORM, - PL_BUFFER_USAGE_STORAGE + PL_BUFFER_USAGE_STORAGE, + PL_BUFFER_USAGE_STAGING, }; enum _plTextureUsage diff --git a/extensions/pl_metal_ext.m b/extensions/pl_metal_ext.m index 5bf58601..c0c5870e 100644 --- a/extensions/pl_metal_ext.m +++ b/extensions/pl_metal_ext.m @@ -130,6 +130,7 @@ - (instancetype)initWithBuffer:(id)buffer { id tTexture; id tHeap; + MTLTextureDescriptor* ptTextureDescriptor; } plMetalTexture; typedef struct _plMetalSampler @@ -208,8 +209,6 @@ - (instancetype)initWithBuffer:(id)buffer typedef struct _plDeviceMetal { id tDevice; - id* sbtBuddyHeaps; - id* sbtStagingHeaps; id* sbtDedicatedHeaps; } plDeviceMetal; @@ -236,17 +235,9 @@ - (instancetype)initWithBuffer:(id)buffer static plTrackedMetalBuffer* pl__dequeue_reusable_buffer(plGraphics* ptGraphics, NSUInteger length); static plMetalPipelineEntry* pl__get_3d_pipelines(plGraphics* ptGraphics, pl3DDrawFlags tFlags, uint32_t uSampleCount, MTLRenderPassDescriptor* ptRenderPassDescriptor); -// device memory allocators specifics -static plDeviceMemoryAllocation pl_allocate_dedicated(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); -static void pl_free_dedicated (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); +static plDeviceAllocationBlock pl_allocate_memory(plDevice* ptDevice, uint64_t ulSize, plMemoryMode tMemoryMode, uint32_t uTypeFilter, const char* pcName); +static void pl_free_memory(plDevice* ptDevice, plDeviceAllocationBlock* ptBlock); -static plDeviceMemoryAllocation pl_allocate_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); -static void pl_free_staging_uncached (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); - -static plDeviceMemoryAllocation pl_allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); - -// device memory allocator general -static plDeviceAllocationBlock* pl_get_allocator_blocks(struct plDeviceMemoryAllocatorO* ptInst, uint32_t* puSizeOut); //----------------------------------------------------------------------------- // [SECTION] public api implementation @@ -725,55 +716,51 @@ - (instancetype)initWithBuffer:(id)buffer pl_sprintf(tBuffer.tDescription.acDebugName, "%s", pcName); } - if(ptDesc->tMemory == PL_MEMORY_GPU_CPU) + MTLResourceOptions tStorageMode = MTLResourceStorageModePrivate; + if(ptDesc->tUsage & PL_BUFFER_USAGE_STAGING) { - tBuffer.tMemoryAllocation = ptDevice->tStagingUnCachedAllocator.allocate(ptDevice->tStagingUnCachedAllocator.ptInst, 0, ptDesc->uByteSize, 0, pcName); + tStorageMode = MTLResourceStorageModeShared; + } - plMetalBuffer tMetalBuffer = { - .tBuffer = [(id)tBuffer.tMemoryAllocation.uHandle newBufferWithLength:ptDesc->uByteSize options:MTLResourceStorageModeShared offset:0] - }; - tMetalBuffer.tBuffer.label = [NSString stringWithUTF8String:ptDesc->acDebugName]; - memset(tMetalBuffer.tBuffer.contents, 0, ptDesc->uByteSize); + MTLSizeAndAlign tSizeAndAlign = [ptMetalDevice->tDevice heapBufferSizeAndAlignWithLength:ptDesc->uByteSize options:tStorageMode]; + tBuffer.tMemoryRequirements.ulSize = tSizeAndAlign.size; + tBuffer.tMemoryRequirements.ulAlignment = tSizeAndAlign.align; + tBuffer.tMemoryRequirements.uMemoryTypeBits = 0; - tBuffer.tMemoryAllocation.pHostMapped = tMetalBuffer.tBuffer.contents; - tBuffer.tMemoryAllocation.ulOffset = 0; - tBuffer.tMemoryAllocation.ulSize = ptDesc->uByteSize; - tMetalBuffer.tHeap = (id)tBuffer.tMemoryAllocation.uHandle; - ptMetalGraphics->sbtBuffersHot[uBufferIndex] = tMetalBuffer; - } - else if(ptDesc->tMemory == PL_MEMORY_GPU) - { + plMetalBuffer tMetalBuffer = { + 0 + }; + ptMetalGraphics->sbtBuffersHot[uBufferIndex] = tMetalBuffer; + ptGraphics->sbtBuffersCold[uBufferIndex] = tBuffer; + return tHandle; +} - plDeviceMemoryAllocatorI* ptAllocator = ptDesc->uByteSize > PL_DEVICE_BUDDY_BLOCK_SIZE ? &ptDevice->tLocalDedicatedAllocator : &ptDevice->tLocalBuddyAllocator; - tBuffer.tMemoryAllocation = ptAllocator->allocate(ptAllocator->ptInst, MTLStorageModePrivate, ptDesc->uByteSize, 0, pcName); +static void +pl_bind_buffer_to_memory(plDevice* ptDevice, plBufferHandle tHandle, const plDeviceMemoryAllocation* ptAllocation) +{ + plGraphics* ptGraphics = ptDevice->ptGraphics; + plDeviceMetal* ptMetalDevice = (plDeviceMetal*)ptDevice->_pInternalData; + plGraphicsMetal* ptMetalGraphics = ptGraphics->_pInternalData; - plMetalBuffer tMetalBuffer = { - .tBuffer = [(id)tBuffer.tMemoryAllocation.uHandle newBufferWithLength:ptDesc->uByteSize options:MTLResourceStorageModePrivate offset:tBuffer.tMemoryAllocation.ulOffset] - }; - tMetalBuffer.tBuffer.label = [NSString stringWithUTF8String:ptDesc->acDebugName]; + plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tHandle.uIndex]; + ptBuffer->tMemoryAllocation = *ptAllocation; + plMetalBuffer* ptMetalBuffer = &ptMetalGraphics->sbtBuffersHot[tHandle.uIndex]; - tMetalBuffer.tHeap = (id)tBuffer.tMemoryAllocation.uHandle; - ptMetalGraphics->sbtBuffersHot[uBufferIndex] = tMetalBuffer; - } - else if(ptDesc->tMemory == PL_MEMORY_CPU) + MTLResourceOptions tStorageMode = MTLResourceStorageModeShared; + if(ptAllocation->tMemoryMode == PL_MEMORY_GPU) { - tBuffer.tMemoryAllocation = ptDevice->tStagingCachedAllocator.allocate(ptDevice->tStagingCachedAllocator.ptInst, MTLStorageModePrivate, ptDesc->uByteSize, 0, pcName); + tStorageMode = MTLResourceStorageModePrivate; + } - plMetalBuffer tMetalBuffer = { - .tBuffer = [(id)tBuffer.tMemoryAllocation.uHandle newBufferWithLength:ptDesc->uByteSize options:MTLResourceStorageModeShared offset:0] - }; - tMetalBuffer.tBuffer.label = [NSString stringWithUTF8String:ptDesc->acDebugName]; - memset(tMetalBuffer.tBuffer.contents, 0, ptDesc->uByteSize); + ptMetalBuffer->tBuffer = [(id)ptAllocation->uHandle newBufferWithLength:ptAllocation->ulSize options:tStorageMode offset:ptAllocation->ulOffset]; + ptMetalBuffer->tBuffer.label = [NSString stringWithUTF8String:ptBuffer->tDescription.acDebugName]; - tBuffer.tMemoryAllocation.pHostMapped = tMetalBuffer.tBuffer.contents; - tBuffer.tMemoryAllocation.ulOffset = 0; - tBuffer.tMemoryAllocation.ulSize = ptDesc->uByteSize; - tMetalBuffer.tHeap = (id)tBuffer.tMemoryAllocation.uHandle; - ptMetalGraphics->sbtBuffersHot[uBufferIndex] = tMetalBuffer; + if(ptAllocation->tMemoryMode != PL_MEMORY_GPU) + { + memset(ptMetalBuffer->tBuffer.contents, 0, ptAllocation->ulSize); + ptBuffer->tMemoryAllocation.pHostMapped = ptMetalBuffer->tBuffer.contents; } - - ptGraphics->sbtBuffersCold[uBufferIndex] = tBuffer; - return tHandle; + ptMetalBuffer->tHeap = (id)ptAllocation->uHandle; } static void @@ -834,7 +821,6 @@ - (instancetype)initWithBuffer:(id)buffer ptTextureDescriptor.width = tDesc.tDimensions.x; ptTextureDescriptor.height = tDesc.tDimensions.y; ptTextureDescriptor.mipmapLevelCount = tDesc.uMips; - ptTextureDescriptor.storageMode = MTLStorageModePrivate; ptTextureDescriptor.arrayLength = 1; ptTextureDescriptor.depth = tDesc.tDimensions.z; ptTextureDescriptor.sampleCount = 1; @@ -846,9 +832,6 @@ - (instancetype)initWithBuffer:(id)buffer if(tDesc.tUsage & PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT) ptTextureDescriptor.usage |= MTLTextureUsageRenderTarget; - // if(tDesc.tUsage & PL_TEXTURE_USAGE_TRANSIENT_ATTACHMENT) - // ptTextureDescriptor.storageMode = MTLStorageModeMemoryless; - if(tDesc.tType == PL_TEXTURE_TYPE_2D) ptTextureDescriptor.textureType = MTLTextureType2D; else if(tDesc.tType == PL_TEXTURE_TYPE_CUBE) @@ -859,21 +842,41 @@ - (instancetype)initWithBuffer:(id)buffer } MTLSizeAndAlign tSizeAndAlign = [ptMetalDevice->tDevice heapTextureSizeAndAlignWithDescriptor:ptTextureDescriptor]; - plDeviceMemoryAllocatorI* ptAllocator = tSizeAndAlign.size > PL_DEVICE_BUDDY_BLOCK_SIZE ? &ptGraphics->tDevice.tLocalDedicatedAllocator : &ptGraphics->tDevice.tLocalBuddyAllocator; - tTexture.tMemoryAllocation = ptAllocator->allocate(ptAllocator->ptInst, ptTextureDescriptor.storageMode, tSizeAndAlign.size, tSizeAndAlign.align, pcName); - + tTexture.tMemoryRequirements.ulAlignment = tSizeAndAlign.align; + tTexture.tMemoryRequirements.ulSize = tSizeAndAlign.size; + tTexture.tMemoryRequirements.uMemoryTypeBits = 0; plMetalTexture tMetalTexture = { - .tTexture = [(id)tTexture.tMemoryAllocation.uHandle newTextureWithDescriptor:ptTextureDescriptor offset:tTexture.tMemoryAllocation.ulOffset], - .tHeap = (id)tTexture.tMemoryAllocation.uHandle + .ptTextureDescriptor = ptTextureDescriptor }; - tMetalTexture.tTexture.label = [NSString stringWithUTF8String:pcName]; - ptMetalGraphics->sbtTexturesHot[uTextureIndex] = tMetalTexture; ptGraphics->sbtTexturesCold[uTextureIndex] = tTexture; - [ptTextureDescriptor release]; return tHandle; } +static void +pl_bind_texture_to_memory(plDevice* ptDevice, plTextureHandle tHandle, const plDeviceMemoryAllocation* ptAllocation) +{ + plGraphics* ptGraphics = ptDevice->ptGraphics; + plDeviceMetal* ptMetalDevice = (plDeviceMetal*)ptDevice->_pInternalData; + plGraphicsMetal* ptMetalGraphics = ptGraphics->_pInternalData; + + plTexture* ptTexture = &ptGraphics->sbtTexturesCold[tHandle.uIndex]; + ptTexture->tMemoryAllocation = *ptAllocation; + plMetalTexture* ptMetalTexture = &ptMetalGraphics->sbtTexturesHot[tHandle.uIndex]; + + MTLStorageMode tStorageMode = MTLStorageModeShared; + if(ptAllocation->tMemoryMode == PL_MEMORY_GPU) + { + tStorageMode = MTLStorageModePrivate; + } + ptMetalTexture->ptTextureDescriptor.storageMode = tStorageMode; + + ptMetalTexture->tTexture = [(id)ptAllocation->uHandle newTextureWithDescriptor:ptMetalTexture->ptTextureDescriptor offset:ptAllocation->ulOffset]; + ptMetalTexture->tHeap = (id)ptAllocation->uHandle; + ptMetalTexture->tTexture.label = [NSString stringWithUTF8String:ptTexture->tDesc.acDebugName]; + [ptMetalTexture->ptTextureDescriptor release]; + ptMetalTexture->ptTextureDescriptor = nil; +} static plTextureHandle pl_create_texture_view(plDevice* ptDevice, const plTextureViewDesc* ptViewDesc, const char* pcName) @@ -1177,7 +1180,7 @@ - (instancetype)initWithBuffer:(id)buffer static char atNameBuffer[PL_MAX_NAME_LENGTH] = {0}; pl_sprintf(atNameBuffer, "D-BUF-F%d-%d", (int)ptGraphics->uCurrentFrameIndex, (int)ptFrame->uCurrentBufferIndex); - ptDynamicBuffer->tMemory = ptGraphics->tDevice.tStagingUnCachedAllocator.allocate(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, 0, PL_DEVICE_ALLOCATION_BLOCK_SIZE, 0, atNameBuffer); + ptDynamicBuffer->tMemory = ptDevice->ptDynamicAllocator->allocate(ptDevice->ptDynamicAllocator->ptInst, 0, PL_DEVICE_ALLOCATION_BLOCK_SIZE, 0, atNameBuffer); ptDynamicBuffer->tBuffer = [(id)ptDynamicBuffer->tMemory.uHandle newBufferWithLength:PL_DEVICE_ALLOCATION_BLOCK_SIZE options:MTLResourceStorageModeShared offset:0]; ptDynamicBuffer->tBuffer.label = [NSString stringWithUTF8String:"buddy allocator"]; } @@ -1439,6 +1442,46 @@ - (instancetype)initWithBuffer:(id)buffer return tHandle; } +typedef struct _plInternalDeviceAllocatorData +{ + plDevice* ptDevice; + plDeviceMemoryAllocatorI* ptAllocator; +} plInternalDeviceAllocatorData; + +static plDeviceMemoryAllocation +pl_allocate_staging_dynamic(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +{ + plInternalDeviceAllocatorData* ptData = (plInternalDeviceAllocatorData*)ptInst; + + plDeviceMemoryAllocation tAllocation = { + .pHostMapped = NULL, + .uHandle = 0, + .ulOffset = 0, + .ulSize = ulSize, + .ptAllocator = ptData->ptAllocator, + .tMemoryMode = PL_MEMORY_GPU_CPU + }; + + + plDeviceAllocationBlock tBlock = pl_allocate_memory(ptData->ptDevice, ulSize, PL_MEMORY_GPU_CPU, uTypeFilter, "Uncached Heap"); + tAllocation.uHandle = tBlock.ulAddress; + tAllocation.pHostMapped = tBlock.pHostMapped; + ptData->ptDevice->ptGraphics->szHostMemoryInUse += ulSize; + return tAllocation; +} + +static void +pl_free_staging_dynamic(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +{ + plInternalDeviceAllocatorData* ptData = (plInternalDeviceAllocatorData*)ptInst; + plDeviceAllocationBlock tBlock = {.ulAddress = ptAllocation->uHandle}; + pl_free_memory(ptData->ptDevice, &tBlock); + ptData->ptDevice->ptGraphics->szHostMemoryInUse -= ptAllocation->ulSize; + ptAllocation->uHandle = 0; + ptAllocation->ulSize = 0; + ptAllocation->ulOffset = 0; +} + static void pl_initialize_graphics(plWindow* ptWindow, plGraphics* ptGraphics) { @@ -1512,43 +1555,15 @@ - (instancetype)initWithBuffer:(id)buffer //~~~~~~~~~~~~~~~~~~~~~~~~~~~~device memory allocators~~~~~~~~~~~~~~~~~~~~~~~~~ - // local dedicated - static plDeviceAllocatorData tLocalDedicatedData = {0}; - tLocalDedicatedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tLocalDedicatedAllocator.allocate = pl_allocate_dedicated; - ptGraphics->tDevice.tLocalDedicatedAllocator.free = pl_free_dedicated; - ptGraphics->tDevice.tLocalDedicatedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tLocalDedicatedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tLocalDedicatedData; - - // local buddy - static plDeviceAllocatorData tLocalBuddyData = {0}; - for(uint32_t i = 0; i < PL_DEVICE_LOCAL_LEVELS; i++) - tLocalBuddyData.auFreeList[i] = UINT32_MAX; - tLocalBuddyData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tLocalBuddyAllocator.allocate = pl_allocate_buddy; - ptGraphics->tDevice.tLocalBuddyAllocator.free = pl_free_buddy; - ptGraphics->tDevice.tLocalBuddyAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tLocalBuddyAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tLocalBuddyAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tLocalBuddyData; - - // staging uncached - static plDeviceAllocatorData tStagingUncachedData = {0}; - tStagingUncachedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tStagingUnCachedAllocator.allocate = pl_allocate_staging_uncached; - ptGraphics->tDevice.tStagingUnCachedAllocator.free = pl_free_staging_uncached; - ptGraphics->tDevice.tStagingUnCachedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tStagingUnCachedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tStagingUncachedData; - - // staging cached - static plDeviceAllocatorData tStagingCachedData = {0}; - tStagingCachedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tStagingCachedAllocator.allocate = pl_allocate_staging_uncached; - ptGraphics->tDevice.tStagingCachedAllocator.free = pl_free_staging_uncached; - ptGraphics->tDevice.tStagingCachedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tStagingCachedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tStagingCachedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tStagingCachedData; + static plInternalDeviceAllocatorData tAllocatorData = {0}; + static plDeviceMemoryAllocatorI tAllocator = {0}; + tAllocatorData.ptAllocator = &tAllocator; + tAllocatorData.ptDevice = &ptGraphics->tDevice; + tAllocator.allocate = pl_allocate_staging_dynamic; + tAllocator.free = pl_free_staging_dynamic; + tAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tAllocatorData; + ptGraphics->tDevice.ptDynamicAllocator = &tAllocator; + plDeviceMemoryAllocatorI* ptDynamicAllocator = &tAllocator; MTLHeapDescriptor* ptHeapDescriptor = [MTLHeapDescriptor new]; ptHeapDescriptor.storageMode = MTLStorageModeShared; @@ -1570,7 +1585,7 @@ - (instancetype)initWithBuffer:(id)buffer pl_sb_resize(tFrame.sbtDynamicBuffers, 1); static char atNameBuffer[PL_MAX_NAME_LENGTH] = {0}; pl_sprintf(atNameBuffer, "D-BUF-F%d-0", (int)i); - tFrame.sbtDynamicBuffers[0].tMemory = ptGraphics->tDevice.tStagingUnCachedAllocator.allocate(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, 0, PL_DEVICE_ALLOCATION_BLOCK_SIZE, 0,atNameBuffer); + tFrame.sbtDynamicBuffers[0].tMemory = ptGraphics->tDevice.ptDynamicAllocator->allocate(ptGraphics->tDevice.ptDynamicAllocator->ptInst, 0, PL_DEVICE_ALLOCATION_BLOCK_SIZE, 0,atNameBuffer); tFrame.sbtDynamicBuffers[0].tBuffer = [(id)tFrame.sbtDynamicBuffers[0].tMemory.uHandle newBufferWithLength:PL_DEVICE_ALLOCATION_BLOCK_SIZE options:MTLResourceStorageModeShared offset:0]; tFrame.sbtDynamicBuffers[0].tBuffer.label = [NSString stringWithUTF8String:pl_temp_allocator_sprintf(&tTempAllocator, "Dynamic Buffer: %u, 0", i)]; @@ -1925,21 +1940,11 @@ - (instancetype)initWithBuffer:(id)buffer id tCmdBuffer = (id)ptEncoder->tCommandBuffer._pInternal; id tComputeEncoder = (id)ptEncoder->_pInternal; - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtBuddyHeaps); i++) - { - [tComputeEncoder useHeap:ptMetalDevice->sbtBuddyHeaps[i]]; - } - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtDedicatedHeaps); i++) { [tComputeEncoder useHeap:ptMetalDevice->sbtDedicatedHeaps[i]]; } - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtStagingHeaps); i++) - { - [tComputeEncoder useHeap:ptMetalDevice->sbtStagingHeaps[i]]; - } - for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) { [tComputeEncoder useHeap:ptMetalGraphics->sbFrames[i].tDescriptorHeap]; @@ -1974,21 +1979,11 @@ - (instancetype)initWithBuffer:(id)buffer id tDevice = ptMetalDevice->tDevice; plFrameContext* ptFrame = pl__get_frame_resources(ptGraphics); - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtBuddyHeaps); i++) - { - [tRenderEncoder useHeap:ptMetalDevice->sbtBuddyHeaps[i] stages:MTLRenderStageVertex | MTLRenderStageFragment]; - } - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtDedicatedHeaps); i++) { [tRenderEncoder useHeap:ptMetalDevice->sbtDedicatedHeaps[i] stages:MTLRenderStageVertex | MTLRenderStageFragment]; } - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtStagingHeaps); i++) - { - [tRenderEncoder useHeap:ptMetalDevice->sbtStagingHeaps[i] stages:MTLRenderStageVertex | MTLRenderStageFragment]; - } - for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) { [tRenderEncoder useHeap:ptMetalGraphics->sbFrames[i].tDescriptorHeap stages:MTLRenderStageVertex | MTLRenderStageFragment]; @@ -2196,6 +2191,12 @@ - (instancetype)initWithBuffer:(id)buffer pl_end_profile_sample(); } +static void +pl_flush_device(plDevice* ptDevice) +{ + gptThread->sleep_thread(500); +} + static void pl_cleanup(plGraphics* ptGraphics) { @@ -2203,16 +2204,6 @@ - (instancetype)initWithBuffer:(id)buffer plDeviceMetal* ptMetalDevice = (plDeviceMetal*)ptGraphics->tDevice._pInternalData; - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtBuddyHeaps); i++) - { - pl_sb_free(ptMetalDevice->sbtBuddyHeaps); - } - - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtStagingHeaps); i++) - { - pl_sb_free(ptMetalDevice->sbtStagingHeaps); - } - for(uint32_t i = 0; i < pl_sb_size(ptMetalDevice->sbtDedicatedHeaps); i++) { pl_sb_free(ptMetalDevice->sbtDedicatedHeaps); @@ -2870,18 +2861,9 @@ - (instancetype)initWithBuffer:(id)buffer for(uint32_t i = 0; i < pl_sb_size(ptGarbage->sbtMemory); i++) { - if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptGarbage->sbtMemory[i]); + ptGarbage->sbtMemory[i].ptAllocator->free(ptGarbage->sbtMemory[i].ptAllocator->ptInst, &ptGarbage->sbtMemory[i]); } - plDeviceAllocatorData* ptUnCachedAllocatorData = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst; - pl_sb_reset(ptGarbage->sbtTextures); pl_sb_reset(ptGarbage->sbtShaders); pl_sb_reset(ptGarbage->sbtComputeShaders); @@ -2897,11 +2879,15 @@ - (instancetype)initWithBuffer:(id)buffer // [SECTION] device memory allocators //----------------------------------------------------------------------------- -static plDeviceMemoryAllocation -pl_allocate_dedicated(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +static plDeviceAllocationBlock +pl_allocate_memory(plDevice* ptDevice, uint64_t ulSize, plMemoryMode tMemoryMode, uint32_t uTypeFilter, const char* pcName) { - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plDeviceMetal* ptMetalDevice =ptData->ptDevice->_pInternalData; + plDeviceMetal* ptMetalDevice = ptDevice->_pInternalData; + + if(pcName == NULL) + { + pcName = "unnamed memory block"; + } plDeviceAllocationBlock tBlock = { .ulAddress = 0, @@ -2909,200 +2895,52 @@ - (instancetype)initWithBuffer:(id)buffer }; MTLHeapDescriptor* ptHeapDescriptor = [MTLHeapDescriptor new]; - ptHeapDescriptor.storageMode = uTypeFilter; - ptHeapDescriptor.size = tBlock.ulSize; - ptHeapDescriptor.type = MTLHeapTypePlacement; + ptHeapDescriptor.size = tBlock.ulSize; + ptHeapDescriptor.type = MTLHeapTypePlacement; ptHeapDescriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked; + if(tMemoryMode == PL_MEMORY_GPU_CPU || tMemoryMode == PL_MEMORY_CPU) + { + ptHeapDescriptor.storageMode = MTLStorageModeShared; + ptDevice->ptGraphics->szHostMemoryInUse += tBlock.ulSize; + } + else if(tMemoryMode == PL_MEMORY_GPU) + { + ptHeapDescriptor.storageMode = MTLStorageModePrivate; + ptDevice->ptGraphics->szLocalMemoryInUse += tBlock.ulSize; + } + id tNewHeap = [ptMetalDevice->tDevice newHeapWithDescriptor:ptHeapDescriptor]; - tNewHeap.label = @"Dedicated Heap"; + tNewHeap.label = [NSString stringWithUTF8String:pcName]; tBlock.ulAddress = (uint64_t)tNewHeap; - ptData->ptDevice->ptGraphics->szLocalMemoryInUse += tBlock.ulSize; - pl_sb_push(ptMetalDevice->sbtDedicatedHeaps, tNewHeap); - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = tBlock.ulAddress, - .ulOffset = 0, - .ulSize = ulSize, - .ptInst = ptInst - }; - - uint32_t uBlockIndex = pl_sb_size(ptData->sbtBlocks); - if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) - uBlockIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); - else - pl_sb_add(ptData->sbtBlocks); - - plDeviceAllocationRange tRange = { - .ulOffset = 0, - .ulTotalSize = ulSize, - .ulUsedSize = ulSize, - .ulBlockIndex = uBlockIndex - }; - pl_sprintf(tRange.acName, "%s", pcName); - - pl_sb_push(ptData->sbtNodes, tRange); - ptData->sbtBlocks[uBlockIndex] = tBlock; + pl_sb_push(ptMetalDevice->sbtDedicatedHeaps, tNewHeap); [ptHeapDescriptor release]; - return tAllocation; + return tBlock; } static void -pl_free_dedicated(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +pl_free_memory(plDevice* ptDevice, plDeviceAllocationBlock* ptBlock) { - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - uint32_t uBlockIndex = 0; - uint32_t uNodeIndex = 0; - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - - if(ptBlock->ulAddress == ptAllocation->uHandle) - { - uNodeIndex = i; - uBlockIndex = (uint32_t)ptNode->ulBlockIndex; - ptData->ptDevice->ptGraphics->szLocalMemoryInUse -= ptBlock->ulSize; - ptBlock->ulSize = 0; - break; - } - } - pl_sb_del_swap(ptData->sbtNodes, uNodeIndex); - pl_sb_push(ptData->sbtFreeBlockIndices, uBlockIndex); - - id tHeap = (id)ptAllocation->uHandle; + id tHeap = (id)ptBlock->ulAddress; [tHeap setPurgeableState:MTLPurgeableStateEmpty]; [tHeap release]; tHeap = nil; - ptAllocation->pHostMapped = NULL; - ptAllocation->uHandle = 0; - ptAllocation->ulOffset = 0; - ptAllocation->ulSize = 0; -} - -static plDeviceMemoryAllocation -pl_allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plDeviceMetal* ptMetalDevice =ptData->ptDevice->_pInternalData; - - plDeviceMemoryAllocation tAllocation = pl__allocate_buddy(ptInst, uTypeFilter, ulSize, ulAlignment, pcName, 0); - - if(tAllocation.uHandle == 0) - { - plDeviceAllocationBlock* ptBlock = &pl_sb_top(ptData->sbtBlocks); - MTLHeapDescriptor* ptHeapDescriptor = [MTLHeapDescriptor new]; - ptHeapDescriptor.storageMode = uTypeFilter; - ptHeapDescriptor.size = PL_DEVICE_BUDDY_BLOCK_SIZE; - ptHeapDescriptor.type = MTLHeapTypePlacement; - ptHeapDescriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked; - id tNewHeap = [ptMetalDevice->tDevice newHeapWithDescriptor:ptHeapDescriptor]; - tNewHeap.label = @"Buddy Heap"; - ptBlock->ulAddress = (uint64_t)tNewHeap; - tAllocation.uHandle = (uint64_t)ptBlock->ulAddress; - ptData->ptDevice->ptGraphics->szLocalMemoryInUse += ptBlock->ulSize; - pl_sb_push(ptMetalDevice->sbtBuddyHeaps, tNewHeap); - } - - return tAllocation; -} - -static plDeviceMemoryAllocation -pl_allocate_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plDeviceMetal* ptMetalDevice =ptData->ptDevice->_pInternalData; - - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = 0, - .ulOffset = 0, - .ulSize = ulSize, - .ptInst = ptInst - }; - - // check for existing block - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - if(ptNode->ulUsedSize == 0 && ptNode->ulTotalSize >= ulSize && ptBlock->ulAddress != 0) - { - ptNode->ulUsedSize = ulSize; - pl_sprintf(ptNode->acName, "%s", pcName); - tAllocation.pHostMapped = ptBlock->pHostMapped; - tAllocation.uHandle = ptBlock->ulAddress; - tAllocation.ulOffset = 0; - tAllocation.ulSize = ptBlock->ulSize; - return tAllocation; - } - } - - uint32_t uIndex = UINT32_MAX; - if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) + if(ptBlock->tMemoryMode == PL_MEMORY_GPU) { - uIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); + ptDevice->ptGraphics->szLocalMemoryInUse -= ptBlock->ulSize; } else { - uIndex = pl_sb_size(ptData->sbtBlocks); - pl_sb_add(ptData->sbtNodes); - pl_sb_add(ptData->sbtBlocks); - } - - plDeviceAllocationBlock tBlock = { - .ulAddress = 0, - .ulSize = pl_maxu((uint32_t)ulSize, PL_DEVICE_ALLOCATION_BLOCK_SIZE) - }; - - plDeviceAllocationRange tRange = { - .ulOffset = 0, - .ulUsedSize = ulSize, - .ulTotalSize = tBlock.ulSize, - .ulBlockIndex = uIndex - }; - pl_sprintf(tRange.acName, "%s", pcName); - - MTLHeapDescriptor* ptHeapDescriptor = [MTLHeapDescriptor new]; - ptHeapDescriptor.storageMode = MTLStorageModeShared; - ptHeapDescriptor.size = tBlock.ulSize; - ptHeapDescriptor.type = MTLHeapTypePlacement; - ptData->ptDevice->ptGraphics->szHostMemoryInUse += tBlock.ulSize; - - id tNewHeap = [ptMetalDevice->tDevice newHeapWithDescriptor:ptHeapDescriptor]; - tNewHeap.label = @"Uncached Heap"; - tBlock.ulAddress = (uint64_t)tNewHeap; - tAllocation.uHandle = tBlock.ulAddress; - - pl_sb_push(ptMetalDevice->sbtStagingHeaps, tNewHeap); - - ptData->sbtNodes[uIndex] = tRange; - ptData->sbtBlocks[uIndex] = tBlock; - return tAllocation; -} - -static void -pl_free_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtBlocks); i++) - { - plDeviceAllocationRange* ptRange = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptRange->ulBlockIndex]; - - // find block - if(ptBlock->ulAddress == ptAllocation->uHandle) - { - ptRange->ulUsedSize = 0; - memset(ptRange->acName, 0, PL_MAX_NAME_LENGTH); - strncpy(ptRange->acName, "not used", PL_MAX_NAME_LENGTH); - break; - } + ptDevice->ptGraphics->szHostMemoryInUse -= ptBlock->ulSize; } + ptBlock->ulAddress = 0; + ptBlock->pHostMapped = NULL; + ptBlock->ulSize = 0; + ptBlock->tMemoryMode = 0; + ptBlock->ulMemoryType = 0; + ptBlock->dLastTimeUsed = 0; } static void @@ -3120,14 +2958,14 @@ - (instancetype)initWithBuffer:(id)buffer plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tHandle.uIndex]; - if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); + // if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) + // ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptBuffer->tMemoryAllocation); + // else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) + // ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptBuffer->tMemoryAllocation); + // else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) + // ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); + // else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) + // ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); } static void @@ -3146,14 +2984,14 @@ - (instancetype)initWithBuffer:(id)buffer plTexture* ptTexture = &ptGraphics->sbtTexturesCold[tHandle.uIndex]; - if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); + // if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) + // ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptTexture->tMemoryAllocation); + // else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) + // ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptTexture->tMemoryAllocation); + // else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) + // ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); + // else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) + // ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); } static void @@ -3326,7 +3164,12 @@ - (instancetype)initWithBuffer:(id)buffer .get_buffer = pl__get_buffer, .get_texture = pl__get_texture, .get_bind_group = pl__get_bind_group, - .get_shader = pl__get_shader + .get_shader = pl__get_shader, + .allocate_memory = pl_allocate_memory, + .free_memory = pl_free_memory, + .flush_device = pl_flush_device, + .bind_buffer_to_memory = pl_bind_buffer_to_memory, + .bind_texture_to_memory = pl_bind_texture_to_memory }; return &tApi; } diff --git a/extensions/pl_ref_renderer_ext.c b/extensions/pl_ref_renderer_ext.c index 1039779f..9e4ce701 100644 --- a/extensions/pl_ref_renderer_ext.c +++ b/extensions/pl_ref_renderer_ext.c @@ -32,6 +32,7 @@ Index of this file: #include "pl_resource_ext.h" #include "pl_image_ext.h" #include "pl_stats_ext.h" +#include "pl_gpu_allocators_ext.h" // misc #include "cgltf.h" @@ -193,6 +194,11 @@ typedef struct _plRefRendererData plGraphics tGraphics; + // allocators + plDeviceMemoryAllocatorI* ptLocalDedicatedAllocator; + plDeviceMemoryAllocatorI* ptLocalBuddyAllocator; + plDeviceMemoryAllocatorI* ptStagingUnCachedAllocator; + // misc textures plSamplerHandle tDefaultSampler; plTextureHandle tDummyTexture; @@ -233,16 +239,18 @@ typedef struct _plRefRendererData static plRefRendererData* gptData = NULL; // apis -static const plDataRegistryI* gptDataRegistry = NULL; -static const plResourceI* gptResource = NULL; -static const plEcsI* gptECS = NULL; -static const plFileI* gptFile = NULL; -static const plDeviceI* gptDevice = NULL; -static const plGraphicsI* gptGfx = NULL; -static const plCameraI* gptCamera = NULL; -static const plDrawStreamI* gptStream = NULL; -static const plImageI* gptImage = NULL; -static const plStatsI* gptStats = NULL; +static const plDataRegistryI* gptDataRegistry = NULL; +static const plResourceI* gptResource = NULL; +static const plEcsI* gptECS = NULL; +static const plFileI* gptFile = NULL; +static const plDeviceI* gptDevice = NULL; +static const plGraphicsI* gptGfx = NULL; +static const plCameraI* gptCamera = NULL; +static const plDrawStreamI* gptStream = NULL; +static const plImageI* gptImage = NULL; +static const plStatsI* gptStats = NULL; +static const plGPUAllocatorsI* gptGpuAllocators = NULL; +static const plThreadsI* gptThreads = NULL; //----------------------------------------------------------------------------- // [SECTION] forward declarations @@ -347,22 +355,33 @@ pl_refr_initialize(plWindow* ptWindow) // initialize ecs gptECS->init_component_library(&gptData->tComponentLibrary); + // load allocators + gptData->ptLocalBuddyAllocator = gptGpuAllocators->create_local_buddy_allocator(&ptGraphics->tDevice); + gptData->ptLocalDedicatedAllocator = gptGpuAllocators->create_local_dedicated_allocator(&ptGraphics->tDevice); + gptData->ptStagingUnCachedAllocator = gptGpuAllocators->create_staging_uncached_allocator(&ptGraphics->tDevice); + // initialize graphics ptGraphics->bValidationActive = true; gptGfx->initialize(ptWindow, ptGraphics); gptDataRegistry->set_data("device", &ptGraphics->tDevice); // used by debug extension + + // create main render pass plIO* ptIO = pl_get_io(); // create staging buffer const plBufferDescription tStagingBufferDesc = { - .tMemory = PL_MEMORY_GPU_CPU, - .tUsage = PL_BUFFER_USAGE_UNSPECIFIED, + .tUsage = PL_BUFFER_USAGE_STAGING, .uByteSize = 268435456 }; for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) + { gptData->tStagingBufferHandle[i] = gptDevice->create_buffer(&ptGraphics->tDevice, &tStagingBufferDesc, "staging buffer"); + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, gptData->tStagingBufferHandle[i]); + plDeviceMemoryAllocation tAllocation = gptData->ptStagingUnCachedAllocator->allocate(gptData->ptStagingUnCachedAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "staging buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, gptData->tStagingBufferHandle[i], &tAllocation); + } plBuffer* ptStagingBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, gptData->tStagingBufferHandle[0]); // create dummy texture @@ -375,6 +394,11 @@ pl_refr_initialize(plWindow* ptWindow) .tUsage = PL_TEXTURE_USAGE_SAMPLED, }; gptData->tDummyTexture = gptDevice->create_texture(&ptGraphics->tDevice, &tTextureDesc, "dummy texture"); + { + plTexture* ptTexture = gptDevice->get_texture(&ptGraphics->tDevice, gptData->tDummyTexture); + plDeviceMemoryAllocation tAllocation = gptData->ptLocalBuddyAllocator->allocate(gptData->ptLocalBuddyAllocator->ptInst, ptTexture->tMemoryRequirements.uMemoryTypeBits, ptTexture->tMemoryRequirements.ulSize, ptTexture->tMemoryRequirements.ulAlignment, "dummy texture"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, gptData->tDummyTexture, &tAllocation); + } // copy data to dummy texture static float image[] = { @@ -772,6 +796,20 @@ pl_refr_create_view(uint32_t uSceneHandle, plVec2 tDimensions) ptView->tAlbedoTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tTextureDesc2, "albedo texture original"); ptView->tNormalTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tTextureDesc2, "normal texture original"); ptView->tPositionTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tTextureDesc2, "position texture original"); + + plTexture* ptTexture0 = gptDevice->get_texture(&ptGraphics->tDevice, ptView->tTexture[i]); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "offscreen texture original"); + plDeviceMemoryAllocation tAllocation1 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "albedo texture original"); + plDeviceMemoryAllocation tAllocation2 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "normal texture original"); + plDeviceMemoryAllocation tAllocation3 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "position texture original"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tTexture[i], &tAllocation0); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tAlbedoTexture[i], &tAllocation1); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tNormalTexture[i], &tAllocation2); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tPositionTexture[i], &tAllocation3); + } const plTextureDesc tDepthTextureDesc = { @@ -784,7 +822,15 @@ pl_refr_create_view(uint32_t uSceneHandle, plVec2 tDimensions) .tInitialUsage = PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT }; for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) + { ptView->tDepthTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tDepthTextureDesc, "offscreen depth texture original"); + plTexture* ptTexture0 = gptDevice->get_texture(&ptGraphics->tDevice, ptView->tDepthTexture[i]); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "offscreen depth texture original"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tDepthTexture[i], &tAllocation0); + } for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) { @@ -874,12 +920,16 @@ pl_refr_create_view(uint32_t uSceneHandle, plVec2 tDimensions) gptGfx->register_3d_drawlist(ptGraphics, &ptView->t3DDrawList); const plBufferDescription atGlobalBuffersDesc = { - .tMemory = PL_MEMORY_GPU_CPU, - .tUsage = PL_BUFFER_USAGE_UNIFORM, - .uByteSize = sizeof(BindGroup_0) + .tUsage = PL_BUFFER_USAGE_UNIFORM | PL_BUFFER_USAGE_STAGING, + .uByteSize = PL_DEVICE_ALLOCATION_BLOCK_SIZE }; for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) + { ptView->atGlobalBuffers[i] = gptDevice->create_buffer(&ptGraphics->tDevice, &atGlobalBuffersDesc, "global buffer"); + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, ptView->atGlobalBuffers[i]); + plDeviceMemoryAllocation tAllocation = gptData->ptStagingUnCachedAllocator->allocate(gptData->ptStagingUnCachedAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "global buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, ptView->atGlobalBuffers[i], &tAllocation); + } const uint32_t uStartIndex2 = pl_sb_size(ptScene->sbtVertexPosBuffer); const uint32_t uIndexStart2 = pl_sb_size(ptScene->sbuIndexBuffer); @@ -965,6 +1015,19 @@ pl_refr_resize_view(uint32_t uSceneHandle, uint32_t uViewHandle, plVec2 tDimensi ptView->tAlbedoTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tOffscreenTextureDesc2, pl_temp_allocator_sprintf(&tTempAllocator, "albedo texture %u", i)); ptView->tNormalTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tOffscreenTextureDesc2, pl_temp_allocator_sprintf(&tTempAllocator, "normal texture %u", i)); ptView->tPositionTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tOffscreenTextureDesc2, pl_temp_allocator_sprintf(&tTempAllocator, "position texture %u", i)); + + plTexture* ptTexture0 = gptDevice->get_texture(&ptGraphics->tDevice, ptView->tTexture[i]); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "offscreen texture"); + plDeviceMemoryAllocation tAllocation1 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "albedo texture"); + plDeviceMemoryAllocation tAllocation2 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "normal texture"); + plDeviceMemoryAllocation tAllocation3 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "position texture"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tTexture[i], &tAllocation0); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tAlbedoTexture[i], &tAllocation1); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tNormalTexture[i], &tAllocation2); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tPositionTexture[i], &tAllocation3); } pl_temp_allocator_free(&tTempAllocator); @@ -980,6 +1043,12 @@ pl_refr_resize_view(uint32_t uSceneHandle, uint32_t uViewHandle, plVec2 tDimensi for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) { ptView->tDepthTexture[i] = gptDevice->create_texture(&ptGraphics->tDevice, &tOffscreenDepthTextureDesc, "offscreen depth texture"); + plTexture* ptTexture0 = gptDevice->get_texture(&ptGraphics->tDevice, ptView->tDepthTexture[i]); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "offscreen depth texture"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptView->tDepthTexture[i], &tAllocation0); } for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) @@ -1052,6 +1121,8 @@ pl_refr_cleanup(void) pl_sb_free(ptScene->sbtSkinData); pl_hm_free(&ptScene->tMaterialHashMap); } + gptDevice->flush_device(&gptData->tGraphics.tDevice); + gptGpuAllocators->cleanup_allocators(&gptData->tGraphics.tDevice); gptGfx->cleanup(&gptData->tGraphics); // must be cleaned up after graphics since 3D drawlist are registered as pointers @@ -1122,23 +1193,30 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int plBufferHandle atComputeBuffers[7] = {0}; const uint32_t uPanoramaSize = iPanoramaHeight * iPanoramaWidth * 4 * sizeof(float); const plBufferDescription tInputBufferDesc = { - .tMemory = PL_MEMORY_GPU_CPU, - .tUsage = PL_BUFFER_USAGE_STORAGE, - .uByteSize = uPanoramaSize + .tUsage = PL_BUFFER_USAGE_STORAGE | PL_BUFFER_USAGE_STAGING, + .uByteSize = PL_DEVICE_ALLOCATION_BLOCK_SIZE }; atComputeBuffers[0] = gptDevice->create_buffer(ptDevice, &tInputBufferDesc, "panorama input"); - plBuffer* ptComputeBuffer = gptDevice->get_buffer(ptDevice, atComputeBuffers[0]); - memcpy(ptComputeBuffer->tMemoryAllocation.pHostMapped, pfPanoramaData, iPanoramaWidth * iPanoramaHeight * 4 * sizeof(float)); + { + plBuffer* ptComputeBuffer = gptDevice->get_buffer(ptDevice, atComputeBuffers[0]); + plDeviceMemoryAllocation tAllocation = gptData->ptStagingUnCachedAllocator->allocate(gptData->ptStagingUnCachedAllocator->ptInst, ptComputeBuffer->tMemoryRequirements.uMemoryTypeBits, ptComputeBuffer->tMemoryRequirements.ulSize, ptComputeBuffer->tMemoryRequirements.ulAlignment, "panorama input"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, atComputeBuffers[0], &tAllocation); + memcpy(ptComputeBuffer->tMemoryAllocation.pHostMapped, pfPanoramaData, iPanoramaWidth * iPanoramaHeight * 4 * sizeof(float)); + } const size_t uFaceSize = ((size_t)iResolution * (size_t)iResolution) * 4 * sizeof(float); const plBufferDescription tOutputBufferDesc = { - .tMemory = PL_MEMORY_GPU_CPU, .tUsage = PL_BUFFER_USAGE_STORAGE, - .uByteSize = (uint32_t)uFaceSize + .uByteSize = PL_DEVICE_ALLOCATION_BLOCK_SIZE }; for(uint32_t i = 0; i < 6; i++) + { atComputeBuffers[i + 1] = gptDevice->create_buffer(ptDevice, &tOutputBufferDesc, "panorama output"); + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, atComputeBuffers[i + 1]); + plDeviceMemoryAllocation tAllocation = gptData->ptStagingUnCachedAllocator->allocate(gptData->ptStagingUnCachedAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "panorama output"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, atComputeBuffers[i + 1], &tAllocation); + } plBindGroupLayout tComputeBindGroupLayout = { .uBufferCount = 7, @@ -1206,6 +1284,12 @@ pl_refr_load_skybox_from_panorama(uint32_t uSceneHandle, const char* pcPath, int .tUsage = PL_TEXTURE_USAGE_SAMPLED }; ptScene->tSkyboxTexture = gptDevice->create_texture(ptDevice, &tTextureDesc, "skybox texture"); + plTexture* ptTexture0 = gptDevice->get_texture(&ptGraphics->tDevice, ptScene->tSkyboxTexture); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "skybox texture"); + gptDevice->bind_texture_to_memory(&ptGraphics->tDevice, ptScene->tSkyboxTexture, &tAllocation0); plBufferImageCopy atBufferImageCopy[6] = {0}; for(uint32_t i = 0; i < 6; i++) @@ -2089,6 +2173,13 @@ pl__create_texture_helper(plMaterialComponent* ptMaterial, plTextureSlot tSlot, .tUsage = PL_TEXTURE_USAGE_SAMPLED }; tTexture = gptDevice->create_texture(ptDevice, &tTextureDesc, ptMaterial->atTextureMaps[tSlot].acName); + plTexture* ptTexture0 = gptDevice->get_texture(ptDevice, tTexture); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, ptMaterial->atTextureMaps[tSlot].acName); + gptDevice->bind_texture_to_memory(ptDevice, tTexture, &tAllocation0); + plBufferImageCopy tBufferImageCopy = { .tImageExtent = {texWidth, texHeight, 1}, .uLayerCount = 1 @@ -2115,6 +2206,13 @@ pl__create_texture_helper(plMaterialComponent* ptMaterial, plTextureSlot tSlot, .tUsage = PL_TEXTURE_USAGE_SAMPLED }; tTexture = gptDevice->create_texture(ptDevice, &tTextureDesc, ptMaterial->atTextureMaps[tSlot].acName); + plTexture* ptTexture0 = gptDevice->get_texture(ptDevice, tTexture); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, ptMaterial->atTextureMaps[tSlot].acName); + gptDevice->bind_texture_to_memory(ptDevice, tTexture, &tAllocation0); + plBufferImageCopy tBufferImageCopy = { .tImageExtent = {texWidth, texHeight, 1}, .uLayerCount = 1 @@ -2377,7 +2475,15 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) }; for(uint32_t i = 0; i < PL_FRAMES_IN_FLIGHT; i++) + { tSkinData.atDynamicTexture[i] = gptDevice->create_texture(ptDevice, &tTextureDesc, "joint texture"); + plTexture* ptTexture0 = gptDevice->get_texture(ptDevice, tSkinData.atDynamicTexture[i]); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptTexture0->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation0 = ptAllocator->allocate(ptAllocator->ptInst, ptTexture0->tMemoryRequirements.uMemoryTypeBits, ptTexture0->tMemoryRequirements.ulSize, ptTexture0->tMemoryRequirements.ulAlignment, "joint texture"); + gptDevice->bind_texture_to_memory(ptDevice, tSkinData.atDynamicTexture[i], &tAllocation0); + } plBufferImageCopy tBufferImageCopy = { .tImageExtent = {textureWidth, textureWidth, 1}, @@ -2403,12 +2509,19 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) plCommandBuffer tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, NULL); const plBufferDescription tShaderBufferDesc = { - .tMemory = PL_MEMORY_GPU, .tUsage = PL_BUFFER_USAGE_STORAGE, .uByteSize = sizeof(plMaterial) * pl_sb_size(ptScene->sbtMaterialBuffer) }; memcpy(ptStagingBuffer->tMemoryAllocation.pHostMapped, ptScene->sbtMaterialBuffer, sizeof(plMaterial) * pl_sb_size(ptScene->sbtMaterialBuffer)); ptScene->tMaterialDataBuffer = gptDevice->create_buffer(ptDevice, &tShaderBufferDesc, "shader buffer"); + { + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, ptScene->tMaterialDataBuffer); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptBuffer->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation = ptAllocator->allocate(ptAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "shader buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, ptScene->tMaterialDataBuffer , &tAllocation); + } plBlitEncoder tEncoder = gptGfx->begin_blit_pass(ptGraphics, &tCommandBuffer); gptGfx->copy_buffer(&tEncoder, gptData->tStagingBufferHandle[0], ptScene->tMaterialDataBuffer, 0, 0, tShaderBufferDesc.uByteSize); @@ -2418,12 +2531,19 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) ptStagingBuffer = gptDevice->get_buffer(ptDevice, gptData->tStagingBufferHandle[0]); const plBufferDescription tIndexBufferDesc = { - .tMemory = PL_MEMORY_GPU, .tUsage = PL_BUFFER_USAGE_INDEX, .uByteSize = sizeof(uint32_t) * pl_sb_size(ptScene->sbuIndexBuffer) }; memcpy(ptStagingBuffer->tMemoryAllocation.pHostMapped, ptScene->sbuIndexBuffer, sizeof(uint32_t) * pl_sb_size(ptScene->sbuIndexBuffer)); ptScene->tIndexBuffer = gptDevice->create_buffer(ptDevice, &tIndexBufferDesc, "index buffer"); + { + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, ptScene->tIndexBuffer); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptBuffer->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation = ptAllocator->allocate(ptAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "index buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, ptScene->tIndexBuffer , &tAllocation); + } tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, NULL); tEncoder = gptGfx->begin_blit_pass(ptGraphics, &tCommandBuffer); @@ -2434,12 +2554,19 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) ptStagingBuffer = gptDevice->get_buffer(ptDevice, gptData->tStagingBufferHandle[0]); const plBufferDescription tVertexBufferDesc = { - .tMemory = PL_MEMORY_GPU, .tUsage = PL_BUFFER_USAGE_VERTEX, .uByteSize = sizeof(plVec3) * pl_sb_size(ptScene->sbtVertexPosBuffer) }; memcpy(ptStagingBuffer->tMemoryAllocation.pHostMapped, ptScene->sbtVertexPosBuffer, sizeof(plVec3) * pl_sb_size(ptScene->sbtVertexPosBuffer)); ptScene->tVertexBuffer = gptDevice->create_buffer(ptDevice, &tVertexBufferDesc, "vertex buffer"); + { + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, ptScene->tVertexBuffer); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptBuffer->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation = ptAllocator->allocate(ptAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "vertex buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, ptScene->tVertexBuffer , &tAllocation); + } tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, NULL); tEncoder = gptGfx->begin_blit_pass(ptGraphics, &tCommandBuffer); @@ -2450,12 +2577,19 @@ pl_refr_finalize_scene(uint32_t uSceneHandle) ptStagingBuffer = gptDevice->get_buffer(ptDevice, gptData->tStagingBufferHandle[0]); const plBufferDescription tStorageBufferDesc = { - .tMemory = PL_MEMORY_GPU, .tUsage = PL_BUFFER_USAGE_STORAGE, .uByteSize = sizeof(plVec4) * pl_sb_size(ptScene->sbtVertexDataBuffer) }; memcpy(ptStagingBuffer->tMemoryAllocation.pHostMapped, ptScene->sbtVertexDataBuffer, sizeof(plVec4) * pl_sb_size(ptScene->sbtVertexDataBuffer)); ptScene->tStorageBuffer = gptDevice->create_buffer(ptDevice, &tStorageBufferDesc, "storage buffer"); + { + plBuffer* ptBuffer = gptDevice->get_buffer(&ptGraphics->tDevice, ptScene->tStorageBuffer); + plDeviceMemoryAllocatorI* ptAllocator = gptData->ptLocalBuddyAllocator; + if(ptBuffer->tMemoryRequirements.ulSize > PL_DEVICE_BUDDY_BLOCK_SIZE) + ptAllocator = gptData->ptLocalDedicatedAllocator; + plDeviceMemoryAllocation tAllocation = ptAllocator->allocate(ptAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "storage buffer"); + gptDevice->bind_buffer_to_memory(&ptGraphics->tDevice, ptScene->tStorageBuffer , &tAllocation); + } tCommandBuffer = gptGfx->begin_command_recording(ptGraphics, NULL); tEncoder = gptGfx->begin_blit_pass(ptGraphics, &tCommandBuffer); @@ -3212,15 +3346,17 @@ pl_load_ext(plApiRegistryI* ptApiRegistry, bool bReload) pl_set_context(gptDataRegistry->get_data("ui")); // apis - gptResource = ptApiRegistry->first(PL_API_RESOURCE); - gptECS = ptApiRegistry->first(PL_API_ECS); - gptFile = ptApiRegistry->first(PL_API_FILE); - gptDevice = ptApiRegistry->first(PL_API_DEVICE); - gptGfx = ptApiRegistry->first(PL_API_GRAPHICS); - gptCamera = ptApiRegistry->first(PL_API_CAMERA); - gptStream = ptApiRegistry->first(PL_API_DRAW_STREAM); - gptImage = ptApiRegistry->first(PL_API_IMAGE); - gptStats = ptApiRegistry->first(PL_API_STATS); + gptResource = ptApiRegistry->first(PL_API_RESOURCE); + gptECS = ptApiRegistry->first(PL_API_ECS); + gptFile = ptApiRegistry->first(PL_API_FILE); + gptDevice = ptApiRegistry->first(PL_API_DEVICE); + gptGfx = ptApiRegistry->first(PL_API_GRAPHICS); + gptCamera = ptApiRegistry->first(PL_API_CAMERA); + gptStream = ptApiRegistry->first(PL_API_DRAW_STREAM); + gptImage = ptApiRegistry->first(PL_API_IMAGE); + gptStats = ptApiRegistry->first(PL_API_STATS); + gptGpuAllocators = ptApiRegistry->first(PL_API_GPU_ALLOCATORS); + gptThreads = ptApiRegistry->first(PL_API_THREADS); if(bReload) { diff --git a/extensions/pl_vulkan_ext.c b/extensions/pl_vulkan_ext.c index 5dc2db86..5e59c246 100644 --- a/extensions/pl_vulkan_ext.c +++ b/extensions/pl_vulkan_ext.c @@ -285,6 +285,9 @@ static VkStencilOp pl__vulkan_stencil_op(plStencilOp tSt static VkBlendFactor pl__vulkan_blend_factor(plBlendFactor tFactor); static VkBlendOp pl__vulkan_blend_op(plBlendOp tOp); +static plDeviceAllocationBlock pl_allocate_memory(plDevice* ptDevice, uint64_t ulSize, plMemoryMode tMemoryMode, uint32_t uTypeFilter, const char* pcName); +static void pl_free_memory(plDevice* ptDevice, plDeviceAllocationBlock* ptBlock); + // 3D drawing helpers static pl3DVulkanPipelineEntry* pl__get_3d_pipelines(plGraphics* ptGfx, VkRenderPass tRenderPass, VkSampleCountFlagBits tMSAASampleCount, uint32_t uSubpassIndex, pl3DDrawFlags tFlags); @@ -302,18 +305,6 @@ static uint32_t pl__find_memory_type_(VkPhysicalDeviceMemoryPropert static void pl__garbage_collect(plGraphics* ptGraphics); static void pl__fill_common_render_pass_data(const plRenderPassLayoutDescription* ptDesc, plRenderPassLayout* ptLayout, plRenderPassCommonData* ptDataOut); -// device memory allocators -static plDeviceMemoryAllocation pl_allocate_dedicated(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); -static void pl_free_dedicated (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); - -static plDeviceMemoryAllocation pl_allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); - -static plDeviceMemoryAllocation pl_allocate_staging_uncached (struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); -static void pl_free_staging_uncached (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); - -static plDeviceMemoryAllocation pl_allocate_staging_cached (struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName); -static void pl_free_staging_cached (struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation); - static VKAPI_ATTR VkBool32 VKAPI_CALL pl__debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT tMsgSeverity, VkDebugUtilsMessageTypeFlagsEXT tMsgType, const VkDebugUtilsMessengerCallbackDataEXT* ptCallbackData, void* pUserData); //----------------------------------------------------------------------------- @@ -358,7 +349,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float { if(ptBufferInfo->tVertexBuffer) { - ptGfx->tDevice.tStagingUnCachedAllocator.free(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, &ptBufferInfo->tVertexMemory); + ptGfx->tDevice.ptDynamicAllocator->free(ptGfx->tDevice.ptDynamicAllocator->ptInst, &ptBufferInfo->tVertexMemory); pl_sb_push(ptCurrentFrame->sbtRawBuffers, ptBufferInfo->tVertexBuffer); } @@ -377,7 +368,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float char acBuffer[256] = {0}; pl_sprintf(acBuffer, "3D-SOLID_VTX-F%d", (int)ptGfx->uCurrentFrameIndex); pl_set_vulkan_object_name(&ptGfx->tDevice, (uint64_t)ptBufferInfo->tVertexBuffer, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, acBuffer); - ptBufferInfo->tVertexMemory = ptGfx->tDevice.tStagingUnCachedAllocator.allocate(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); + ptBufferInfo->tVertexMemory = ptGfx->tDevice.ptDynamicAllocator->allocate(ptGfx->tDevice.ptDynamicAllocator->ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, ptBufferInfo->tVertexBuffer, (VkDeviceMemory)ptBufferInfo->tVertexMemory.uHandle, ptBufferInfo->tVertexMemory.ulOffset)); } @@ -397,7 +388,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float { if(ptBufferInfo->tIndexBuffer) { - ptGfx->tDevice.tStagingUnCachedAllocator.free(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, &ptBufferInfo->tIndexMemory); + ptGfx->tDevice.ptDynamicAllocator->free(ptGfx->tDevice.ptDynamicAllocator->ptInst, &ptBufferInfo->tIndexMemory); pl_sb_push(ptCurrentFrame->sbtRawBuffers, ptBufferInfo->tIndexBuffer); } @@ -415,7 +406,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float char acBuffer[256] = {0}; pl_sprintf(acBuffer, "3D-SOLID_IDX-F%d", (int)ptGfx->uCurrentFrameIndex); pl_set_vulkan_object_name(&ptGfx->tDevice, (uint64_t)ptBufferInfo->tIndexBuffer, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, acBuffer); - ptBufferInfo->tIndexMemory = ptGfx->tDevice.tStagingUnCachedAllocator.allocate(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); + ptBufferInfo->tIndexMemory = ptGfx->tDevice.ptDynamicAllocator->allocate(ptGfx->tDevice.ptDynamicAllocator->ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, ptBufferInfo->tIndexBuffer, (VkDeviceMemory)ptBufferInfo->tIndexMemory.uHandle, ptBufferInfo->tIndexMemory.ulOffset)); } @@ -471,7 +462,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float { if(ptBufferInfo->tVertexBuffer) { - ptGfx->tDevice.tStagingUnCachedAllocator.free(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, &ptBufferInfo->tVertexMemory); + ptGfx->tDevice.ptDynamicAllocator->free(ptGfx->tDevice.ptDynamicAllocator->ptInst, &ptBufferInfo->tVertexMemory); pl_sb_push(ptCurrentFrame->sbtRawBuffers, ptBufferInfo->tVertexBuffer); } @@ -490,7 +481,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float char acBuffer[256] = {0}; pl_sprintf(acBuffer, "3D-LINE_VTX-F%d", (int)ptGfx->uCurrentFrameIndex); pl_set_vulkan_object_name(&ptGfx->tDevice, (uint64_t)ptBufferInfo->tVertexBuffer, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, acBuffer); - ptBufferInfo->tVertexMemory = ptGfx->tDevice.tStagingUnCachedAllocator.allocate(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); + ptBufferInfo->tVertexMemory = ptGfx->tDevice.ptDynamicAllocator->allocate(ptGfx->tDevice.ptDynamicAllocator->ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, ptBufferInfo->tVertexBuffer, (VkDeviceMemory)ptBufferInfo->tVertexMemory.uHandle, ptBufferInfo->tVertexMemory.ulOffset)); } @@ -510,7 +501,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float { if(ptBufferInfo->tIndexBuffer) { - ptGfx->tDevice.tStagingUnCachedAllocator.free(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, &ptBufferInfo->tIndexMemory); + ptGfx->tDevice.ptDynamicAllocator->free(ptGfx->tDevice.ptDynamicAllocator->ptInst, &ptBufferInfo->tIndexMemory); pl_sb_push(ptCurrentFrame->sbtRawBuffers, ptBufferInfo->tIndexBuffer); } @@ -528,7 +519,7 @@ pl__submit_3d_drawlist(plDrawList3D* ptDrawlist, plRenderEncoder tEncoder, float char acBuffer[256] = {0}; pl_sprintf(acBuffer, "3D-LINE_IDX-F%d", (int)ptGfx->uCurrentFrameIndex); pl_set_vulkan_object_name(&ptGfx->tDevice, (uint64_t)ptBufferInfo->tIndexBuffer, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, acBuffer); - ptBufferInfo->tIndexMemory = ptGfx->tDevice.tStagingUnCachedAllocator.allocate(ptGfx->tDevice.tStagingUnCachedAllocator.ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); + ptBufferInfo->tIndexMemory = ptGfx->tDevice.ptDynamicAllocator->allocate(ptGfx->tDevice.ptDynamicAllocator->ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, acBuffer); PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, ptBufferInfo->tIndexBuffer, (VkDeviceMemory)ptBufferInfo->tIndexMemory.uHandle, ptBufferInfo->tIndexMemory.ulOffset)); } @@ -691,47 +682,48 @@ pl_create_buffer(plDevice* ptDevice, const plBufferDescription* ptDesc, const ch }; VkBufferUsageFlagBits tBufferUsageFlags = 0; - if(ptDesc->tUsage == PL_BUFFER_USAGE_VERTEX) - tBufferInfo.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - if(ptDesc->tUsage == PL_BUFFER_USAGE_INDEX) - tBufferInfo.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; - if(ptDesc->tUsage == PL_BUFFER_USAGE_STORAGE) - tBufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - if(ptDesc->tUsage == PL_BUFFER_USAGE_UNIFORM) - tBufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + if(ptDesc->tUsage & PL_BUFFER_USAGE_VERTEX) + tBufferInfo.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if(ptDesc->tUsage & PL_BUFFER_USAGE_INDEX) + tBufferInfo.usage |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if(ptDesc->tUsage & PL_BUFFER_USAGE_STORAGE) + tBufferInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if(ptDesc->tUsage & PL_BUFFER_USAGE_UNIFORM) + tBufferInfo.usage |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + if(ptDesc->tUsage & PL_BUFFER_USAGE_STAGING) + tBufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VkMemoryRequirements tMemRequirements = {0}; - if(ptDesc->tMemory == PL_MEMORY_GPU_CPU) - tBufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; - else if(ptDesc->tMemory == PL_MEMORY_GPU) - tBufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; - else if(ptDesc->tMemory == PL_MEMORY_CPU) - tBufferInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - PL_VULKAN(vkCreateBuffer(ptVulkanDevice->tLogicalDevice, &tBufferInfo, NULL, &tVulkanBuffer.tBuffer)); if(pcName) pl_set_vulkan_object_name(ptDevice, (uint64_t)tVulkanBuffer.tBuffer, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT, pcName); vkGetBufferMemoryRequirements(ptVulkanDevice->tLogicalDevice, tVulkanBuffer.tBuffer, &tMemRequirements); - if(ptDesc->tMemory == PL_MEMORY_GPU_CPU) - tBuffer.tMemoryAllocation = ptDevice->tStagingUnCachedAllocator.allocate(ptDevice->tStagingUnCachedAllocator.ptInst, tMemRequirements.memoryTypeBits, tMemRequirements.size, tMemRequirements.alignment, tBuffer.tDescription.acDebugName); - else if(ptDesc->tMemory == PL_MEMORY_CPU) - tBuffer.tMemoryAllocation = ptDevice->tStagingCachedAllocator.allocate(ptDevice->tStagingCachedAllocator.ptInst, tMemRequirements.memoryTypeBits, tMemRequirements.size, tMemRequirements.alignment, tBuffer.tDescription.acDebugName); - else - { - plDeviceMemoryAllocatorI* ptAllocator = tMemRequirements.size > PL_DEVICE_BUDDY_BLOCK_SIZE ? &ptDevice->tLocalDedicatedAllocator : &ptDevice->tLocalBuddyAllocator; - tBuffer.tMemoryAllocation = ptAllocator->allocate(ptAllocator->ptInst, tMemRequirements.memoryTypeBits, tMemRequirements.size, tMemRequirements.alignment, tBuffer.tDescription.acDebugName); - } - - PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, tVulkanBuffer.tBuffer, (VkDeviceMemory)tBuffer.tMemoryAllocation.uHandle, tBuffer.tMemoryAllocation.ulOffset)); - tVulkanBuffer.pcData = tBuffer.tMemoryAllocation.pHostMapped; + tBuffer.tMemoryRequirements.ulAlignment = tMemRequirements.alignment; + tBuffer.tMemoryRequirements.ulSize = tMemRequirements.size; + tBuffer.tMemoryRequirements.uMemoryTypeBits = tMemRequirements.memoryTypeBits; ptVulkanGraphics->sbtBuffersHot[uBufferIndex] = tVulkanBuffer; ptGraphics->sbtBuffersCold[uBufferIndex] = tBuffer; return tHandle; } +static void +pl_bind_buffer_to_memory(plDevice* ptDevice, plBufferHandle tHandle, const plDeviceMemoryAllocation* ptAllocation) +{ + plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; + plVulkanGraphics* ptVulkanGraphics = ptDevice->ptGraphics->_pInternalData; + plGraphics* ptGraphics = ptDevice->ptGraphics; + + plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tHandle.uIndex]; + ptBuffer->tMemoryAllocation = *ptAllocation; + plVulkanBuffer* ptVulkanBuffer = &ptVulkanGraphics->sbtBuffersHot[tHandle.uIndex]; + + PL_VULKAN(vkBindBufferMemory(ptVulkanDevice->tLogicalDevice, ptVulkanBuffer->tBuffer, (VkDeviceMemory)ptAllocation->uHandle, ptAllocation->ulOffset)); + ptVulkanBuffer->pcData = ptAllocation->pHostMapped; +} + static plDynamicBinding pl_allocate_dynamic_data(plDevice* ptDevice, size_t szSize) { @@ -766,13 +758,15 @@ pl_allocate_dynamic_data(plDevice* ptDevice, size_t szSize) ptDynamicBuffer = &ptFrame->sbtDynamicBuffers[ptFrame->uCurrentBufferIndex]; plBufferDescription tStagingBufferDescription0 = { - .tMemory = PL_MEMORY_GPU_CPU, - .tUsage = PL_BUFFER_USAGE_UNIFORM, + .tUsage = PL_BUFFER_USAGE_UNIFORM | PL_BUFFER_USAGE_STAGING, .uByteSize = PL_DEVICE_ALLOCATION_BLOCK_SIZE }; pl_sprintf(tStagingBufferDescription0.acDebugName, "D-BUF-F%d-%d", (int)ptGraphics->uCurrentFrameIndex, (int)ptFrame->uCurrentBufferIndex); - plBufferHandle tStagingBuffer0 = pl_create_buffer(&ptGraphics->tDevice, &tStagingBufferDescription0, NULL); + plBufferHandle tStagingBuffer0 = pl_create_buffer(&ptGraphics->tDevice, &tStagingBufferDescription0, "dynamic buffer"); + plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tStagingBuffer0.uIndex]; + plDeviceMemoryAllocation tAllocation = ptDevice->ptDynamicAllocator->allocate(ptDevice->ptDynamicAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "dynamic buffer"); + pl_bind_buffer_to_memory(ptDevice, tStagingBuffer0, &tAllocation); ptDynamicBuffer->uHandle = tStagingBuffer0.uIndex; ptDynamicBuffer->tBuffer = ptVulkanGraphics->sbtBuffersHot[tStagingBuffer0.uIndex].tBuffer; @@ -962,7 +956,11 @@ pl_create_texture(plDevice* ptDevice, const plTextureDesc* ptDesc, const char* p plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; plVulkanGraphics* ptVulkanGraphics = ptGraphics->_pInternalData; + if(pcName == NULL) + pcName = "unnamed texture"; + plTextureDesc tDesc = *ptDesc; + strncpy(tDesc.acDebugName, pcName, PL_MAX_NAME_LENGTH); if(tDesc.tInitialUsage == PL_TEXTURE_USAGE_UNSPECIFIED) tDesc.tInitialUsage = PL_TEXTURE_USAGE_SAMPLED; @@ -1043,16 +1041,32 @@ pl_create_texture(plDevice* ptDevice, const plTextureDesc* ptDesc, const char* p // get memory requirements VkMemoryRequirements tMemoryRequirements = {0}; vkGetImageMemoryRequirements(ptVulkanDevice->tLogicalDevice, tVulkanTexture.tImage, &tMemoryRequirements); + tTexture.tMemoryRequirements.ulSize = tMemoryRequirements.size; + tTexture.tMemoryRequirements.ulAlignment = tMemoryRequirements.alignment; + tTexture.tMemoryRequirements.uMemoryTypeBits = tMemoryRequirements.memoryTypeBits; - // allocate memory - plDeviceMemoryAllocatorI* ptAllocator = tMemoryRequirements.size > PL_DEVICE_BUDDY_BLOCK_SIZE ? &ptDevice->tLocalDedicatedAllocator : &ptDevice->tLocalBuddyAllocator; - tTexture.tMemoryAllocation = ptAllocator->allocate(ptAllocator->ptInst, tMemoryRequirements.memoryTypeBits, tMemoryRequirements.size, tMemoryRequirements.alignment, pcName); + // upload data + ptVulkanGraphics->sbtTexturesHot[uTextureIndex] = tVulkanTexture; + ptGraphics->sbtTexturesCold[uTextureIndex] = tTexture; + return tHandle; +} - PL_VULKAN(vkBindImageMemory(ptVulkanDevice->tLogicalDevice, tVulkanTexture.tImage, (VkDeviceMemory)tTexture.tMemoryAllocation.uHandle, tTexture.tMemoryAllocation.ulOffset)); +static void +pl_bind_texture_to_memory(plDevice* ptDevice, plTextureHandle tHandle, const plDeviceMemoryAllocation* ptAllocation) +{ + plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; + plVulkanGraphics* ptVulkanGraphics = ptDevice->ptGraphics->_pInternalData; + plGraphics* ptGraphics = ptDevice->ptGraphics; - VkImageAspectFlags tImageAspectFlags = tDesc.tUsage & PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; + plTexture* ptTexture = &ptGraphics->sbtTexturesCold[tHandle.uIndex]; + ptTexture->tMemoryAllocation = *ptAllocation; + plVulkanTexture* ptVulkanTexture = &ptVulkanGraphics->sbtTexturesHot[tHandle.uIndex]; + + PL_VULKAN(vkBindImageMemory(ptVulkanDevice->tLogicalDevice, ptVulkanTexture->tImage, (VkDeviceMemory)ptAllocation->uHandle, ptAllocation->ulOffset)); + + VkImageAspectFlags tImageAspectFlags = ptTexture->tDesc.tUsage & PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT; - if(pl__format_has_stencil(pl__vulkan_format(tDesc.tFormat))) + if(pl__format_has_stencil(pl__vulkan_format(ptTexture->tDesc.tFormat))) tImageAspectFlags |= VK_IMAGE_ASPECT_STENCIL_BIT; VkCommandBuffer tCommandBuffer = {0}; @@ -1074,13 +1088,13 @@ pl_create_texture(plDevice* ptDevice, const plTextureDesc* ptDesc, const char* p VkImageSubresourceRange tRange = { .baseMipLevel = 0, - .levelCount = tDesc.uMips, + .levelCount = ptTexture->tDesc.uMips, .baseArrayLayer = 0, - .layerCount = tDesc.uLayers, + .layerCount = ptTexture->tDesc.uLayers, .aspectMask = tImageAspectFlags }; - pl__transition_image_layout(tCommandBuffer, tVulkanTexture.tImage, VK_IMAGE_LAYOUT_UNDEFINED, pl__vulkan_layout(tDesc.tInitialUsage), tRange, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); + pl__transition_image_layout(tCommandBuffer, ptVulkanTexture->tImage, VK_IMAGE_LAYOUT_UNDEFINED, pl__vulkan_layout(ptTexture->tDesc.tInitialUsage), tRange, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); PL_VULKAN(vkEndCommandBuffer(tCommandBuffer)); const VkSubmitInfo tSubmitInfo = { @@ -1095,26 +1109,29 @@ pl_create_texture(plDevice* ptDevice, const plTextureDesc* ptDesc, const char* p //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~create view~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VkImageViewType tImageViewType = 0; + if(ptTexture->tDesc.tType == PL_TEXTURE_TYPE_CUBE) + tImageViewType = VK_IMAGE_VIEW_TYPE_CUBE; + else if(ptTexture->tDesc.tType == PL_TEXTURE_TYPE_2D) + tImageViewType = VK_IMAGE_VIEW_TYPE_2D; + else + { + PL_ASSERT(false && "unsupported texture type"); + } + PL_ASSERT((ptTexture->tDesc.uLayers == 1 || ptTexture->tDesc.uLayers == 6) && "unsupported layer count"); + VkImageViewCreateInfo tViewInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .image = tVulkanTexture.tImage, + .image = ptVulkanTexture->tImage, .viewType = tImageViewType, - .format = pl__vulkan_format(tDesc.tFormat), - .subresourceRange.baseMipLevel = tTexture.tView.uBaseMip, - .subresourceRange.levelCount = tDesc.uMips, - .subresourceRange.baseArrayLayer = tTexture.tView.uBaseLayer, - .subresourceRange.layerCount = tTexture.tView.uLayerCount, - .subresourceRange.aspectMask = tDesc.tUsage & PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT, - }; - PL_VULKAN(vkCreateImageView(ptVulkanDevice->tLogicalDevice, &tViewInfo, NULL, &tVulkanTexture.tImageView)); - - if(pcName) - pl_set_vulkan_object_name(ptDevice, (uint64_t)tVulkanTexture.tImage, VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT, pcName); - - // upload data - ptVulkanGraphics->sbtTexturesHot[uTextureIndex] = tVulkanTexture; - ptGraphics->sbtTexturesCold[uTextureIndex] = tTexture; - return tHandle; + .format = pl__vulkan_format(ptTexture->tDesc.tFormat), + .subresourceRange.baseMipLevel = ptTexture->tView.uBaseMip, + .subresourceRange.levelCount = ptTexture->tDesc.uMips, + .subresourceRange.baseArrayLayer = ptTexture->tView.uBaseLayer, + .subresourceRange.layerCount = ptTexture->tView.uLayerCount, + .subresourceRange.aspectMask = ptTexture->tDesc.tUsage & PL_TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT, + }; + PL_VULKAN(vkCreateImageView(ptVulkanDevice->tLogicalDevice, &tViewInfo, NULL, &ptVulkanTexture->tImageView)); } static plTextureHandle @@ -2889,6 +2906,46 @@ pl_draw_list(plGraphics* ptGraphics, plRenderEncoder tEncoder, uint32_t uListCou } } +typedef struct _plInternalDeviceAllocatorData +{ + plDevice* ptDevice; + plDeviceMemoryAllocatorI* ptAllocator; +} plInternalDeviceAllocatorData; + +static plDeviceMemoryAllocation +pl_allocate_staging_dynamic(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +{ + plInternalDeviceAllocatorData* ptData = (plInternalDeviceAllocatorData*)ptInst; + + plDeviceMemoryAllocation tAllocation = { + .pHostMapped = NULL, + .uHandle = 0, + .ulOffset = 0, + .ulSize = ulSize, + .ptAllocator = ptData->ptAllocator, + .tMemoryMode = PL_MEMORY_GPU_CPU + }; + + + plDeviceAllocationBlock tBlock = pl_allocate_memory(ptData->ptDevice, ulSize, PL_MEMORY_GPU_CPU, uTypeFilter, "Uncached Heap"); + tAllocation.uHandle = tBlock.ulAddress; + tAllocation.pHostMapped = tBlock.pHostMapped; + ptData->ptDevice->ptGraphics->szHostMemoryInUse += ulSize; + return tAllocation; +} + +static void +pl_free_staging_dynamic(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +{ + plInternalDeviceAllocatorData* ptData = (plInternalDeviceAllocatorData*)ptInst; + plDeviceAllocationBlock tBlock = {.ulAddress = ptAllocation->uHandle}; + pl_free_memory(ptData->ptDevice, &tBlock); + ptData->ptDevice->ptGraphics->szHostMemoryInUse -= ptAllocation->ulSize; + ptAllocation->uHandle = 0; + ptAllocation->ulSize = 0; + ptAllocation->ulOffset = 0; +} + static void pl_initialize_graphics(plWindow* ptWindow, plGraphics* ptGraphics) { @@ -3272,46 +3329,6 @@ pl_initialize_graphics(plWindow* ptWindow, plGraphics* ptGraphics) ptVulkanDevice->vkCmdDebugMarkerInsert = (PFN_vkCmdDebugMarkerInsertEXT)vkGetDeviceProcAddr(ptVulkanDevice->tLogicalDevice, "vkCmdDebugMarkerInsertEXT"); } - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~device memory allocators~~~~~~~~~~~~~~~~~~~~~~~~~ - - // local dedicated - static plDeviceAllocatorData tLocalDedicatedData = {0}; - tLocalDedicatedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tLocalDedicatedAllocator.allocate = pl_allocate_dedicated; - ptGraphics->tDevice.tLocalDedicatedAllocator.free = pl_free_dedicated; - ptGraphics->tDevice.tLocalDedicatedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tLocalDedicatedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tLocalDedicatedData; - - // local buddy - static plDeviceAllocatorData tLocalBuddyData = {0}; - for(uint32_t i = 0; i < PL_DEVICE_LOCAL_LEVELS; i++) - tLocalBuddyData.auFreeList[i] = UINT32_MAX; - tLocalBuddyData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tLocalBuddyAllocator.allocate = pl_allocate_buddy; - ptGraphics->tDevice.tLocalBuddyAllocator.free = pl_free_buddy; - ptGraphics->tDevice.tLocalBuddyAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tLocalBuddyAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tLocalBuddyAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tLocalBuddyData; - - // staging uncached - static plDeviceAllocatorData tStagingUncachedData = {0}; - tStagingUncachedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tStagingUnCachedAllocator.allocate = pl_allocate_staging_uncached; - ptGraphics->tDevice.tStagingUnCachedAllocator.free = pl_free_staging_uncached; - ptGraphics->tDevice.tStagingUnCachedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tStagingUnCachedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tStagingUncachedData; - - // staging cached - static plDeviceAllocatorData tStagingCachedData = {0}; - tStagingCachedData.ptDevice = &ptGraphics->tDevice; - ptGraphics->tDevice.tStagingCachedAllocator.allocate = pl_allocate_staging_cached; - ptGraphics->tDevice.tStagingCachedAllocator.free = pl_free_staging_cached; - ptGraphics->tDevice.tStagingCachedAllocator.blocks = pl_get_allocator_blocks; - ptGraphics->tDevice.tStagingCachedAllocator.ranges = pl_get_allocator_ranges; - ptGraphics->tDevice.tStagingCachedAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tStagingCachedData; - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~command pool~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ const VkCommandPoolCreateInfo tCommandPoolInfo = { @@ -3348,6 +3365,16 @@ pl_initialize_graphics(plWindow* ptWindow, plGraphics* ptGraphics) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~frame resources~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + static plInternalDeviceAllocatorData tAllocatorData = {0}; + static plDeviceMemoryAllocatorI tAllocator = {0}; + tAllocatorData.ptAllocator = &tAllocator; + tAllocatorData.ptDevice = &ptGraphics->tDevice; + tAllocator.allocate = pl_allocate_staging_dynamic; + tAllocator.free = pl_free_staging_dynamic; + tAllocator.ptInst = (struct plDeviceMemoryAllocatorO*)&tAllocatorData; + ptGraphics->tDevice.ptDynamicAllocator = &tAllocator; + plDeviceMemoryAllocatorI* ptDynamicAllocator = &tAllocator; + // dynamic buffer stuff VkDescriptorSetLayoutBinding tBinding = { .binding = 0, @@ -3392,13 +3419,15 @@ pl_initialize_graphics(plWindow* ptWindow, plGraphics* ptGraphics) // dynamic buffer stuff pl_sb_resize(tFrame.sbtDynamicBuffers, 1); plBufferDescription tStagingBufferDescription0 = { - .tMemory = PL_MEMORY_GPU_CPU, - .tUsage = PL_BUFFER_USAGE_UNIFORM, + .tUsage = PL_BUFFER_USAGE_UNIFORM | PL_BUFFER_USAGE_STAGING, .uByteSize = PL_DEVICE_ALLOCATION_BLOCK_SIZE }; pl_sprintf(tStagingBufferDescription0.acDebugName, "D-BUF-F%d-0", (int)i); - plBufferHandle tStagingBuffer0 = pl_create_buffer(&ptGraphics->tDevice, &tStagingBufferDescription0, NULL); + plBufferHandle tStagingBuffer0 = pl_create_buffer(&ptGraphics->tDevice, &tStagingBufferDescription0, "dynamic buffer 0"); + plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tStagingBuffer0.uIndex]; + plDeviceMemoryAllocation tAllocation = ptDynamicAllocator->allocate(ptDynamicAllocator->ptInst, ptBuffer->tMemoryRequirements.uMemoryTypeBits, ptBuffer->tMemoryRequirements.ulSize, ptBuffer->tMemoryRequirements.ulAlignment, "dynamic buffer"); + pl_bind_buffer_to_memory(&ptGraphics->tDevice, tStagingBuffer0, &tAllocation); tFrame.uCurrentBufferIndex = UINT32_MAX; tFrame.sbtDynamicBuffers[0].uHandle = tStagingBuffer0.uIndex; @@ -3795,6 +3824,13 @@ pl_resize(plGraphics* ptGraphics) pl_end_profile_sample(); } +static void +pl_flush_device(plDevice* ptDevice) +{ + plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; + vkDeviceWaitIdle(ptVulkanDevice->tLogicalDevice); +} + static void pl_shutdown(plGraphics* ptGraphics) { @@ -3949,35 +3985,6 @@ pl_shutdown(plGraphics* ptGraphics) pl_sb_free(ptFrame->sbtReadyCommandBuffers); } - plDeviceAllocatorData* ptData0 = (plDeviceAllocatorData*)ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst; - - for(uint32_t i = 0; i < pl_sb_size(ptData0->sbtBlocks); i++) - { - if(ptData0->sbtBlocks[i].ulAddress) - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptData0->sbtBlocks[i].ulAddress, NULL); - } - - plDeviceAllocatorData* ptData1 = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst; - for(uint32_t i = 0; i < pl_sb_size(ptData1->sbtBlocks); i++) - { - if((VkDeviceMemory)ptData1->sbtBlocks[i].ulAddress) - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptData1->sbtBlocks[i].ulAddress, NULL); - } - - plDeviceAllocatorData* ptData2 = (plDeviceAllocatorData*)ptGraphics->tDevice.tLocalBuddyAllocator.ptInst; - for(uint32_t i = 0; i < pl_sb_size(ptData2->sbtBlocks); i++) - { - if((VkDeviceMemory)ptData2->sbtBlocks[i].ulAddress) - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptData2->sbtBlocks[i].ulAddress, NULL); - } - - plDeviceAllocatorData* ptData3 = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingCachedAllocator.ptInst; - for(uint32_t i = 0; i < pl_sb_size(ptData3->sbtBlocks); i++) - { - if((VkDeviceMemory)ptData3->sbtBlocks[i].ulAddress) - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptData3->sbtBlocks[i].ulAddress, NULL); - } - for(uint32_t i = 0; i < pl_sb_size(ptVulkanGfx->sbtRenderPassesHot); i++) { if(ptVulkanGfx->sbtRenderPassesHot[i].tRenderPass) @@ -5174,154 +5181,20 @@ pl__pilotlight_format(VkFormat tFormat) // [SECTION] device memory allocators //----------------------------------------------------------------------------- -static plDeviceMemoryAllocation -pl_allocate_dedicated(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plVulkanDevice* ptVulkanDevice = ptData->ptDevice->_pInternalData; - - uint32_t uMemoryType = 0u; - bool bFound = false; - for (uint32_t i = 0; i < ptVulkanDevice->tMemProps.memoryTypeCount; i++) - { - if ((uTypeFilter & (1 << i)) && (ptVulkanDevice->tMemProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) - { - uMemoryType = i; - bFound = true; - break; - } - } - PL_ASSERT(bFound); - - const VkMemoryAllocateInfo tAllocInfo = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = ulSize, - .memoryTypeIndex = uMemoryType - }; - - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = 0, - .ulOffset = 0, - .ulSize = tAllocInfo.allocationSize, - .ptInst = ptInst - }; - VkResult tResult = vkAllocateMemory(ptVulkanDevice->tLogicalDevice, &tAllocInfo, NULL, (VkDeviceMemory*)&tAllocation.uHandle); - PL_VULKAN(tResult); - - ptData->ptDevice->ptGraphics->szLocalMemoryInUse += tAllocInfo.allocationSize; - - plDeviceAllocationBlock tBlock = { - .ulAddress = tAllocation.uHandle, - .ulSize = tAllocInfo.allocationSize, - .ulMemoryType = (uint64_t)uMemoryType - }; - - uint32_t uBlockIndex = pl_sb_size(ptData->sbtBlocks); - if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) - uBlockIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); - else - pl_sb_add(ptData->sbtBlocks); - - plDeviceAllocationRange tRange = { - .ulOffset = 0, - .ulTotalSize = tAllocInfo.allocationSize, - .ulUsedSize = ulSize, - .ulBlockIndex = uBlockIndex - }; - pl_sprintf(tRange.acName, "%s", pcName); - - if(pcName) - pl_set_vulkan_object_name(ptData->ptDevice, tAllocation.uHandle, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - - pl_sb_push(ptData->sbtNodes, tRange); - ptData->sbtBlocks[uBlockIndex] = tBlock; - return tAllocation; -} - -static void -pl_free_dedicated(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plVulkanDevice* ptVulkanDevice = ptData->ptDevice->_pInternalData; - - uint32_t uBlockIndex = 0; - uint32_t uNodeIndex = 0; - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - - if(ptBlock->ulAddress == ptAllocation->uHandle) - { - uNodeIndex = i; - uBlockIndex = (uint32_t)ptNode->ulBlockIndex; - ptBlock->ulSize = 0; - ptBlock->ulAddress = 0; - ptData->ptDevice->ptGraphics->szLocalMemoryInUse -= ptBlock->ulSize; - break; - } - } - pl_sb_del_swap(ptData->sbtNodes, uNodeIndex); - pl_sb_push(ptData->sbtFreeBlockIndices, uBlockIndex); - // pl_sb_del_swap(ptData->sbtBlocks, uBlockIndex); - - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptAllocation->uHandle, NULL); - ptAllocation->pHostMapped = NULL; - ptAllocation->uHandle = 0; - ptAllocation->ulOffset = 0; - ptAllocation->ulSize = 0; -} - -static plDeviceMemoryAllocation -pl_allocate_buddy(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plVulkanDevice* ptVulkanDevice = ptData->ptDevice->_pInternalData; - - // find what level we need - uint32_t uMemoryType = 0u; - bool bFound = false; - for (uint32_t i = 0; i < ptVulkanDevice->tMemProps.memoryTypeCount; i++) - { - if ((uTypeFilter & (1 << i)) && (ptVulkanDevice->tMemProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) - { - uMemoryType = i; - bFound = true; - break; - } - } - PL_ASSERT(bFound); - - plDeviceMemoryAllocation tAllocation = pl__allocate_buddy(ptInst, uTypeFilter, ulSize, ulAlignment, pcName, uMemoryType); - - if(tAllocation.uHandle == 0) - { - plDeviceAllocationBlock* ptBlock = &pl_sb_top(ptData->sbtBlocks); - - const VkMemoryAllocateInfo tAllocInfo = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = PL_DEVICE_BUDDY_BLOCK_SIZE, - .memoryTypeIndex = uMemoryType - }; - VkResult tResult = vkAllocateMemory(ptVulkanDevice->tLogicalDevice, &tAllocInfo, NULL, (VkDeviceMemory*)&ptBlock->ulAddress); - PL_VULKAN(tResult); - ptData->ptDevice->ptGraphics->szLocalMemoryInUse += PL_DEVICE_BUDDY_BLOCK_SIZE; - tAllocation.uHandle = (uint64_t)ptBlock->ulAddress; - } - - return tAllocation; -} - -static plDeviceMemoryAllocation -pl_allocate_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) +static plDeviceAllocationBlock +pl_allocate_memory(plDevice* ptDevice, uint64_t ulSize, plMemoryMode tMemoryMode, uint32_t uTypeFilter, const char* pcName) { - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plVulkanDevice* ptVulkanDevice = ptData->ptDevice->_pInternalData; + plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; uint32_t uMemoryType = 0u; bool bFound = false; - const VkMemoryPropertyFlags tProperties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + VkMemoryPropertyFlags tProperties = 0; + if(tMemoryMode == PL_MEMORY_GPU_CPU) + tProperties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + else if(tMemoryMode == PL_MEMORY_GPU) + tProperties = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else if(tMemoryMode == PL_MEMORY_CPU) + tProperties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; for (uint32_t i = 0; i < ptVulkanDevice->tMemProps.memoryTypeCount; i++) { if ((uTypeFilter & (1 << i)) && (ptVulkanDevice->tMemProps.memoryTypes[i].propertyFlags & tProperties) == tProperties) @@ -5333,208 +5206,63 @@ pl_allocate_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t u } PL_ASSERT(bFound); - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = 0, - .ulOffset = 0, - .ulSize = ulSize, - .ptInst = ptInst - }; - - // check for existing block - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - if(ptNode->ulUsedSize == 0 && ptNode->ulTotalSize >= ulSize && ptBlock->ulMemoryType == (uint64_t)uMemoryType && ptBlock->ulAddress != 0) - { - ptNode->ulUsedSize = ulSize; - pl_sprintf(ptNode->acName, "%s", pcName); - tAllocation.pHostMapped = ptBlock->pHostMapped; - tAllocation.uHandle = ptBlock->ulAddress; - tAllocation.ulOffset = 0; - tAllocation.ulSize = ptBlock->ulSize; - if(pcName) - pl_set_vulkan_object_name(ptData->ptDevice, tAllocation.uHandle, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - return tAllocation; - } - } - - uint32_t uIndex = UINT32_MAX; - if(pl_sb_size(ptData->sbtFreeBlockIndices) > 0) + if(pcName == NULL) { - uIndex = pl_sb_pop(ptData->sbtFreeBlockIndices); - } - else - { - uIndex = pl_sb_size(ptData->sbtBlocks); - pl_sb_add(ptData->sbtNodes); - pl_sb_add(ptData->sbtBlocks); + pcName = "unnamed memory block"; } - // block not found, create new block plDeviceAllocationBlock tBlock = { - .ulAddress = 0, - .ulSize = pl_maxu((uint32_t)ulSize, PL_DEVICE_ALLOCATION_BLOCK_SIZE), - .ulMemoryType = uMemoryType - }; - - plDeviceAllocationRange tRange = { - .ulOffset = 0, - .ulUsedSize = ulSize, - .ulTotalSize = tBlock.ulSize, - .ulBlockIndex = uIndex + .ulAddress = 0, + .ulSize = ulSize, + .ulMemoryType = (uint64_t)uMemoryType }; - pl_sprintf(tRange.acName, "%s", pcName); - const VkMemoryAllocateInfo tAllocInfo = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = tBlock.ulSize, + .allocationSize = ulSize, .memoryTypeIndex = uMemoryType }; - PL_VULKAN(vkAllocateMemory(ptVulkanDevice->tLogicalDevice, &tAllocInfo, NULL, (VkDeviceMemory*)&tBlock.ulAddress)); - ptData->ptDevice->ptGraphics->szHostMemoryInUse += tBlock.ulSize; - - PL_VULKAN(vkMapMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)tBlock.ulAddress, 0, tBlock.ulSize, 0, (void**)&tBlock.pHostMapped)); - - tAllocation.pHostMapped = tBlock.pHostMapped; - tAllocation.uHandle = tBlock.ulAddress; - tAllocation.ulOffset = 0; - - if(pcName) - pl_set_vulkan_object_name(ptData->ptDevice, tAllocation.uHandle, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - - ptData->sbtNodes[uIndex] = tRange; - ptData->sbtBlocks[uIndex] = tBlock; - return tAllocation; -} - -static void -pl_free_staging_uncached(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) - { - plDeviceAllocationRange* ptRange = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptRange->ulBlockIndex]; - - // find block - if(ptBlock->ulAddress == ptAllocation->uHandle) - { - ptRange->ulUsedSize = 0; - memset(ptRange->acName, 0, PL_MAX_NAME_LENGTH); - strncpy(ptRange->acName, "not used", PL_MAX_NAME_LENGTH); - break; - } - } -} + VkDeviceMemory tMemory = VK_NULL_HANDLE; + VkResult tResult = vkAllocateMemory(ptVulkanDevice->tLogicalDevice, &tAllocInfo, NULL, &tMemory); + PL_VULKAN(tResult); + tBlock.ulAddress = (uint64_t)tMemory; -static plDeviceMemoryAllocation -pl_allocate_staging_cached(struct plDeviceMemoryAllocatorO* ptInst, uint32_t uTypeFilter, uint64_t ulSize, uint64_t ulAlignment, const char* pcName) -{ - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; - plVulkanDevice* ptVulkanDevice = ptData->ptDevice->_pInternalData; + pl_set_vulkan_object_name(ptDevice, tBlock.ulAddress, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - uint32_t uMemoryType = 0u; - bool bFound = false; - const VkMemoryPropertyFlags tProperties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - for (uint32_t i = 0; i < ptVulkanDevice->tMemProps.memoryTypeCount; i++) + if(tMemoryMode == PL_MEMORY_GPU) { - if ((uTypeFilter & (1 << i)) && (ptVulkanDevice->tMemProps.memoryTypes[i].propertyFlags & tProperties) == tProperties) - { - uMemoryType = i; - bFound = true; - break; - } + ptDevice->ptGraphics->szLocalMemoryInUse += tBlock.ulSize; } - PL_ASSERT(bFound); - - plDeviceMemoryAllocation tAllocation = { - .pHostMapped = NULL, - .uHandle = 0, - .ulOffset = 0, - .ulSize = ulSize, - .ptInst = ptInst - }; - - // check for existing block - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) + else { - plDeviceAllocationRange* ptNode = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptNode->ulBlockIndex]; - if(ptNode->ulUsedSize == 0 && ptNode->ulTotalSize >= ulSize && ptBlock->ulMemoryType == (uint64_t)uMemoryType) - { - ptNode->ulUsedSize = ulSize; - pl_sprintf(ptNode->acName, "%s", pcName); - tAllocation.pHostMapped = ptBlock->pHostMapped; - tAllocation.uHandle = ptBlock->ulAddress; - tAllocation.ulOffset = 0; - tAllocation.ulSize = ptBlock->ulSize; - if(pcName) - pl_set_vulkan_object_name(ptData->ptDevice, tAllocation.uHandle, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - return tAllocation; - } + PL_VULKAN(vkMapMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)tBlock.ulAddress, 0, tBlock.ulSize, 0, (void**)&tBlock.pHostMapped)); + ptDevice->ptGraphics->szHostMemoryInUse += tBlock.ulSize; } - // block not found, create new block - plDeviceAllocationBlock tBlock = { - .ulAddress = 0, - .ulSize = pl_maxu((uint32_t)ulSize, PL_DEVICE_ALLOCATION_BLOCK_SIZE), - .ulMemoryType = uMemoryType - }; - - plDeviceAllocationRange tRange = { - .ulOffset = 0, - .ulUsedSize = ulSize, - .ulTotalSize = tBlock.ulSize, - .ulBlockIndex = pl_sb_size(ptData->sbtBlocks) - }; - pl_sprintf(tRange.acName, "%s", pcName); - - const VkMemoryAllocateInfo tAllocInfo = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .allocationSize = tBlock.ulSize, - .memoryTypeIndex = uMemoryType - }; - - PL_VULKAN(vkAllocateMemory(ptVulkanDevice->tLogicalDevice, &tAllocInfo, NULL, (VkDeviceMemory*)&tBlock.ulAddress)); - ptData->ptDevice->ptGraphics->szHostMemoryInUse += tBlock.ulSize; - - PL_VULKAN(vkMapMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)tBlock.ulAddress, 0, tBlock.ulSize, 0, (void**)&tBlock.pHostMapped)); - - tAllocation.pHostMapped = tBlock.pHostMapped; - tAllocation.uHandle = tBlock.ulAddress; - tAllocation.ulOffset = 0; - - if(pcName) - pl_set_vulkan_object_name(ptData->ptDevice, tAllocation.uHandle, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT, pcName); - - pl_sb_push(ptData->sbtNodes, tRange); - pl_sb_push(ptData->sbtBlocks, tBlock); - return tAllocation; + return tBlock; } static void -pl_free_staging_cached(struct plDeviceMemoryAllocatorO* ptInst, plDeviceMemoryAllocation* ptAllocation) +pl_free_memory(plDevice* ptDevice, plDeviceAllocationBlock* ptBlock) { - plDeviceAllocatorData* ptData = (plDeviceAllocatorData*)ptInst; + plVulkanDevice* ptVulkanDevice = ptDevice->_pInternalData; - for(uint32_t i = 0; i < pl_sb_size(ptData->sbtNodes); i++) + if(ptBlock->tMemoryMode == PL_MEMORY_GPU) { - plDeviceAllocationRange* ptRange = &ptData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptData->sbtBlocks[ptRange->ulBlockIndex]; - - // find block - if(ptBlock->ulAddress == ptAllocation->uHandle) - { - ptRange->ulUsedSize = 0; - memset(ptRange->acName, 0, PL_MAX_NAME_LENGTH); - strncpy(ptRange->acName, "not used", PL_MAX_NAME_LENGTH); - break; - } + ptDevice->ptGraphics->szLocalMemoryInUse -= ptBlock->ulSize; + } + else + { + ptDevice->ptGraphics->szHostMemoryInUse -= ptBlock->ulSize; } + + vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptBlock->ulAddress, NULL); + ptBlock->ulAddress = 0; + ptBlock->pHostMapped = NULL; + ptBlock->ulSize = 0; + ptBlock->tMemoryMode = 0; + ptBlock->ulMemoryType = 0; + ptBlock->dLastTimeUsed = 0; } static void @@ -5679,38 +5407,10 @@ pl__garbage_collect(plGraphics* ptGraphics) for(uint32_t i = 0; i < pl_sb_size(ptGarbage->sbtMemory); i++) { - if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptGarbage->sbtMemory[i]); - else if(ptGarbage->sbtMemory[i].ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptGarbage->sbtMemory[i]); - } - - plDeviceAllocatorData* ptUnCachedAllocatorData = (plDeviceAllocatorData*)ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst; - - plIO* ptIO = pl_get_io(); - for(uint32_t i = 0; i < pl_sb_size(ptUnCachedAllocatorData->sbtNodes); i++) - { - plDeviceAllocationRange* ptNode = &ptUnCachedAllocatorData->sbtNodes[i]; - plDeviceAllocationBlock* ptBlock = &ptUnCachedAllocatorData->sbtBlocks[ptNode->ulBlockIndex]; - - if(ptBlock->ulAddress == 0) - { - continue; - } - if(ptNode->ulUsedSize == 0 && ptIO->dTime - ptBlock->dLastTimeUsed > 1.0) - { - ptGraphics->szHostMemoryInUse -= ptBlock->ulSize; - vkUnmapMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptBlock->ulAddress); - vkFreeMemory(ptVulkanDevice->tLogicalDevice, (VkDeviceMemory)ptBlock->ulAddress, NULL); - ptBlock->ulAddress = 0; - pl_sb_push(ptUnCachedAllocatorData->sbtFreeBlockIndices, (uint32_t)ptNode->ulBlockIndex); - } - else if(ptNode->ulUsedSize != 0) - ptBlock->dLastTimeUsed = ptIO->dTime; + plDeviceMemoryAllocation tAllocation = ptGarbage->sbtMemory[i]; + plDeviceMemoryAllocatorI* ptAllocator = tAllocation.ptAllocator; + if(ptAllocator) // swapchain doesn't have allocator since texture is provided + ptAllocator->free(ptAllocator->ptInst, &tAllocation); } pl_sb_reset(ptGarbage->sbtTextures); @@ -5739,15 +5439,7 @@ pl_destroy_buffer(plDevice* ptDevice, plBufferHandle tHandle) pl_sb_push(ptGraphics->sbtBufferFreeIndices, tHandle.uIndex); plBuffer* ptBuffer = &ptGraphics->sbtBuffersCold[tHandle.uIndex]; - - if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); - else if(ptBuffer->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptBuffer->tMemoryAllocation); + ptBuffer->tMemoryAllocation.ptAllocator->free(ptBuffer->tMemoryAllocation.ptAllocator->ptInst, &ptBuffer->tMemoryAllocation); } static void @@ -5764,15 +5456,7 @@ pl_destroy_texture(plDevice* ptDevice, plTextureHandle tHandle) ptGraphics->sbtTextureGenerations[tHandle.uIndex]++; plTexture* ptTexture = &ptGraphics->sbtTexturesCold[tHandle.uIndex]; - - if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalBuddyAllocator.ptInst) - ptGraphics->tDevice.tLocalBuddyAllocator.free(ptGraphics->tDevice.tLocalBuddyAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst) - ptGraphics->tDevice.tLocalDedicatedAllocator.free(ptGraphics->tDevice.tLocalDedicatedAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingUnCachedAllocator.free(ptGraphics->tDevice.tStagingUnCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); - else if(ptTexture->tMemoryAllocation.ptInst == ptGraphics->tDevice.tStagingCachedAllocator.ptInst) - ptGraphics->tDevice.tStagingCachedAllocator.free(ptGraphics->tDevice.tStagingCachedAllocator.ptInst, &ptTexture->tMemoryAllocation); + ptTexture->tMemoryAllocation.ptAllocator->free(ptTexture->tMemoryAllocation.ptAllocator->ptInst, &ptTexture->tMemoryAllocation); } static void @@ -5975,7 +5659,12 @@ pl_load_device_api(void) .get_buffer = pl__get_buffer, .get_texture = pl__get_texture, .get_bind_group = pl__get_bind_group, - .get_shader = pl__get_shader + .get_shader = pl__get_shader, + .allocate_memory = pl_allocate_memory, + .free_memory = pl_free_memory, + .flush_device = pl_flush_device, + .bind_buffer_to_memory = pl_bind_buffer_to_memory, + .bind_texture_to_memory = pl_bind_texture_to_memory }; return &tApi; } diff --git a/scripts/gen_build.py b/scripts/gen_build.py index 3d9ab4c3..218bc70a 100644 --- a/scripts/gen_build.py +++ b/scripts/gen_build.py @@ -122,6 +122,7 @@ def add_plugin_to_metal_app(name, reloadable, objc = False, binary_name = None): add_plugin_to_vulkan_app("pl_stats_ext", False) add_plugin_to_vulkan_app("pl_ecs_ext", False) add_plugin_to_vulkan_app("pl_resource_ext", False) + add_plugin_to_vulkan_app("pl_gpu_allocators_ext", False) add_plugin_to_vulkan_app("pl_ref_renderer_ext", True) add_plugin_to_metal_app("pl_debug_ext", False) @@ -130,6 +131,7 @@ def add_plugin_to_metal_app(name, reloadable, objc = False, binary_name = None): add_plugin_to_metal_app("pl_ecs_ext", False) add_plugin_to_metal_app("pl_resource_ext", False) add_plugin_to_metal_app("pl_metal_ext", False, True, "pl_graphics_ext") + add_plugin_to_metal_app("pl_gpu_allocators_ext", False) add_plugin_to_metal_app("pl_ref_renderer_ext", True) pl.pop_target_links() diff --git a/scripts/package.py b/scripts/package.py index fa7458ca..5c35f7e3 100644 --- a/scripts/package.py +++ b/scripts/package.py @@ -20,6 +20,7 @@ "pl_stats_ext", "pl_resource_ext", "pl_ref_renderer_ext", + "pl_gpu_allocators_ext", ] if os.path.isdir(target_directory):