From 24ff85b11f3dc8eec0b69f18d3ca7323c6896b3e Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 18 Jan 2025 09:40:31 +0100 Subject: [PATCH 1/5] implement new index cache --- .../Renderer/Metal/MetalBufferAllocator.h | 83 ++++++++++++------- .../Renderer/Metal/MetalMemoryManager.cpp | 6 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 53 ++++++------ .../HW/Latte/Renderer/Metal/MetalRenderer.h | 5 +- .../Renderer/Metal/RendererShaderMtl.cpp | 6 +- 5 files changed, 88 insertions(+), 65 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 209b1395..102ccdc9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,8 +1,8 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Common/precompiled.h" -#include "Metal/MTLResource.hpp" +#include "util/helpers/MemoryPool.h" + #include struct MetalBufferRange @@ -54,7 +54,7 @@ public: return m_buffers[bufferIndex].m_buffer; } - MetalBufferAllocation GetBufferAllocation(size_t size) + MetalBufferAllocation GetAllocation(size_t size) { // Align the size size = Align(size, 128); @@ -121,29 +121,6 @@ public: return allocation; } - void FreeAllocation(MetalBufferAllocation& allocation) - { - MetalBufferRange range; - range.offset = allocation.offset; - range.size = allocation.size; - - allocation.offset = INVALID_OFFSET; - - // Find the correct position to insert the free range - auto& buffer = m_buffers[allocation.bufferIndex]; - for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++) - { - auto& freeRange = buffer.m_freeRanges[i]; - if (freeRange.offset + freeRange.size == range.offset) - { - freeRange.size += range.size; - return; - } - } - - buffer.m_freeRanges.push_back(range); - } - protected: class MetalRenderer* m_mtlr; @@ -276,7 +253,7 @@ public: m_executingCommandBuffers.erase(it); } - MTL::Buffer* GetBuffer(uint32 bufferIndex) + void MarkBufferAsUsed(uint32 bufferIndex) { cemu_assert_debug(m_activeCommandBuffer); @@ -287,8 +264,13 @@ public: buffer.m_data.m_commandBufferCount++; buffer.m_data.m_lastCommandBuffer = m_activeCommandBuffer; } + } - return buffer.m_buffer; + MTL::Buffer* GetBuffer(uint32 bufferIndex) + { + MarkBufferAsUsed(bufferIndex); + + return m_buffers[bufferIndex].m_buffer; } MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) @@ -296,6 +278,49 @@ public: return m_buffers[bufferIndex].m_buffer; } + MetalBufferAllocation* GetAllocationPtr(size_t size) + { + MetalBufferAllocation* allocation = m_poolAllocatorReservation.allocObj(); + *allocation = GetAllocation(size); + + LockBuffer(allocation->bufferIndex); + + return allocation; + } + + void FreeAllocation(MetalBufferAllocation& allocation) + { + // TODO + /* + MetalBufferRange range; + range.offset = allocation.offset; + range.size = allocation.size; + + allocation.offset = INVALID_OFFSET; + + // Find the correct position to insert the free range + auto& buffer = m_buffers[allocation.bufferIndex]; + for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++) + { + auto& freeRange = buffer.m_freeRanges[i]; + if (freeRange.offset + freeRange.size == range.offset) + { + freeRange.size += range.size; + return; + } + } + + buffer.m_freeRanges.push_back(range); + */ + UnlockBuffer(allocation.bufferIndex); + } + + void FreeAllocation(MetalBufferAllocation* allocation) + { + FreeAllocation(*allocation); + m_poolAllocatorReservation.freeObj(allocation); + } + /* MetalBufferAllocation GetBufferAllocation(size_t size) { @@ -350,5 +375,7 @@ private: std::map> m_executingCommandBuffers; std::map>::iterator m_activeCommandBufferIt; + MemoryPool m_poolAllocatorReservation{32}; + uint16 m_framesSinceBackBufferAccess = 0; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 8b9ac89f..25d82d5f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -73,7 +73,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si if (m_bufferCacheMode == BufferCacheMode::DevicePrivate) { - auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); + auto allocation = m_tempBufferAllocator.GetAllocation(size); auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); memcpy((uint8*)buffer->contents() + allocation.offset, data, size); @@ -82,8 +82,8 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); - // Make sure the buffer has the right command buffer - m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this + // Mark buffer as used + m_tempBufferAllocator.MarkBufferAsUsed(allocation.bufferIndex); // We can now safely unlock the buffer m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 83c39b5c..a5d50c46 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -683,7 +683,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s // Allocate a temporary buffer auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize); + auto allocation = bufferAllocator.GetAllocation(compressedImageSize); auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); // Copy the data to the temporary buffer @@ -1067,9 +1067,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 hostIndexCount; uint32 indexMin = 0; uint32 indexMax = 0; - uint32 indexBufferOffset = 0; - uint32 indexBufferIndex = 0; - LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); + Renderer::IndexAllocation indexAllocation; + LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation); + MetalBufferAllocation* indexAllocationMtl = static_cast(indexAllocation.rendererInternal); // Buffer cache if (m_memoryManager->UseHostMemoryForCache()) @@ -1312,16 +1312,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 if (hostIndexType != INDEX_TYPE::NONE) { auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex); - - // We have already retrieved the buffer, no need for it to be locked anymore - bufferAllocator.UnlockBuffer(indexBufferIndex); + indexBuffer = bufferAllocator.GetBuffer(indexAllocationMtl->bufferIndex); } if (usesGeometryShader) { if (indexBuffer) - SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexAllocationMtl->offset, vertexShader->resourceMapping.indexBufferBinding); uint8 hostIndexTypeU8 = (uint8)hostIndexType; renderCommandEncoder->setObjectBytes(&hostIndexTypeU8, sizeof(hostIndexTypeU8), vertexShader->resourceMapping.indexTypeBinding); @@ -1352,7 +1349,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 if (indexBuffer) { auto mtlIndexType = GetMtlIndexType(hostIndexType); - renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); + renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexAllocationMtl->offset, instanceCount, baseVertex, baseInstance); } else { @@ -1492,29 +1489,27 @@ void MetalRenderer::draw_handleSpecialState5() renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } -void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) +Renderer::IndexAllocation MetalRenderer::indexData_reserveIndexMemory(uint32 size) { auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetBufferAllocation(size); - offset = allocation.offset; - bufferIndex = allocation.bufferIndex; + auto allocation = bufferAllocator.GetAllocationPtr(size); - // Lock the buffer so that it doesn't get released - bufferAllocator.LockBuffer(allocation.bufferIndex); - - return allocation.data; + return {allocation->data, allocation}; } -void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) +void MetalRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation) { - // Do nothing - /* - if (!HasUnifiedMemory()) - { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex); - buffer->didModifyRange(NS::Range(offset, size)); - } - */ + auto allocationMtl = static_cast(allocation.rendererInternal); + + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + bufferAllocator.FreeAllocation(allocationMtl); +} + +void MetalRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation) +{ + // TODO: uncomment + //auto& bufferAllocator = m_memoryManager->GetBufferAllocator(); + //bufferAllocator.FlushAllocation(static_cast(allocation.rendererInternal)); } LatteQueryObject* MetalRenderer::occlusionQuery_create() { @@ -2102,9 +2097,9 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE } } - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); size_t size = shader->uniform.uniformRangeSize; - auto supportBuffer = bufferAllocator.GetBufferAllocation(size); + auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); + auto supportBuffer = bufferAllocator.GetAllocation(size); memcpy(supportBuffer.data, supportBufferData, size); auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); //if (!HasUnifiedMemory()) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 760ad6bc..5a1dbbf5 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -271,8 +271,9 @@ public: void draw_handleSpecialState5(); // index - void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; - void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; + IndexAllocation indexData_reserveIndexMemory(uint32 size) override; + void indexData_releaseIndexMemory(IndexAllocation& allocation) override; + void indexData_uploadIndexMemory(IndexAllocation& allocation) override; // occlusion queries LatteQueryObject* occlusionQuery_create() override; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 9e6d3b9c..07073e08 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -15,8 +15,8 @@ #define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) static bool s_isLoadingShadersMtl{false}; -static bool s_hasRAMFilesystem{false}; -class FileCache* s_airCache{nullptr}; +//static bool s_hasRAMFilesystem{false}; +//class FileCache* s_airCache{nullptr}; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -190,6 +190,7 @@ void RendererShaderMtl::ShaderCacheLoading_end() void RendererShaderMtl::ShaderCacheLoading_Close() { // Close the AIR cache + /* if (s_airCache) { delete s_airCache; @@ -197,7 +198,6 @@ void RendererShaderMtl::ShaderCacheLoading_Close() } // Close RAM filesystem - /* if (s_hasRAMFilesystem) executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); */ From 97b806f16f959779420231a76cb8088013c8ceb7 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 18 Jan 2025 14:42:57 +0100 Subject: [PATCH 2/5] rework buffer allocators --- src/Cafe/CMakeLists.txt | 1 + .../Renderer/Metal/MetalBufferAllocator.cpp | 233 +++++++++ .../Renderer/Metal/MetalBufferAllocator.h | 475 +++++------------- .../Renderer/Metal/MetalMemoryManager.cpp | 16 +- .../Latte/Renderer/Metal/MetalMemoryManager.h | 26 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 65 +-- .../HW/Latte/Renderer/Metal/MetalRenderer.h | 17 +- 7 files changed, 391 insertions(+), 442 deletions(-) create mode 100644 src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index ced42766..881a6d6d 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -555,6 +555,7 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/CachedFBOMtl.h HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h + HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp HW/Latte/Renderer/Metal/MetalBufferAllocator.h HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp new file mode 100644 index 00000000..62d0c093 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp @@ -0,0 +1,233 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" + +MetalBufferChunkedHeap::~MetalBufferChunkedHeap() +{ + for (auto& chunk : m_chunkBuffers) + chunk->release(); +} + +uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) +{ + size_t allocationSize = std::max(m_minimumBufferAllocationSize, minimumAllocationSize); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, MTL::ResourceStorageModeShared); + cemu_assert_debug(buffer); + cemu_assert_debug(m_chunkBuffers.size() == chunkIndex); + m_chunkBuffers.emplace_back(buffer); + + return allocationSize; +} + +void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset) +{ + auto commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + if (commandBuffer == buffer.lastSyncpointCommandBuffer) + return; + buffer.lastSyncpointCommandBuffer = commandBuffer; + buffer.queue_syncPoints.emplace(commandBuffer, offset); +} + +void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc) +{ + // calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc + uint32 bufferAllocSize = m_minimumBufferAllocSize; + while (bufferAllocSize < sizeRequiredForAlloc) + bufferAllocSize += m_minimumBufferAllocSize; + + AllocatorBuffer_t newBuffer{}; + newBuffer.writeIndex = 0; + newBuffer.basePtr = nullptr; + newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, MTL::ResourceStorageModeShared); + newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents(); + newBuffer.size = bufferAllocSize; + newBuffer.index = (uint32)m_buffers.size(); + m_buffers.push_back(newBuffer); +} + +MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) +{ + if (alignment < 128) + alignment = 128; + size = (size + 127) & ~127; + + for (auto& itr : m_buffers) + { + // align pointer + uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment; + uint32 distanceToSyncPoint; + if (!itr.queue_syncPoints.empty()) + { + if (itr.queue_syncPoints.front().offset < itr.writeIndex) + distanceToSyncPoint = 0xFFFFFFFF; + else + distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex; + } + else + distanceToSyncPoint = 0xFFFFFFFF; + uint32 spaceNeeded = alignmentPadding + size; + if (spaceNeeded > distanceToSyncPoint) + continue; // not enough space in current buffer + if ((itr.writeIndex + spaceNeeded) > itr.size) + { + // wrap-around + spaceNeeded = size; + alignmentPadding = 0; + // check if there is enough space in current buffer after wrap-around + if (!itr.queue_syncPoints.empty()) + { + distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0; + if (spaceNeeded > distanceToSyncPoint) + continue; + } + else if (spaceNeeded > itr.size) + continue; + itr.writeIndex = 0; + } + addUploadBufferSyncPoint(itr, itr.writeIndex); + itr.writeIndex += alignmentPadding; + uint32 offset = itr.writeIndex; + itr.writeIndex += size; + itr.cleanupCounter = 0; + MetalSynchronizedRingAllocator::AllocatorReservation_t res; + res.mtlBuffer = itr.mtlBuffer; + res.memPtr = itr.basePtr + offset; + res.bufferOffset = offset; + res.size = size; + res.bufferIndex = itr.index; + + return res; + } + + // allocate new buffer + allocateAdditionalUploadBuffer(size); + + return AllocateBufferMemory(size, alignment); +} + +void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation) +{ + /* + cemu_assert_debug(m_bufferType == VKR_BUFFER_TYPE::STAGING); // only the staging buffer isn't coherent + // todo - use nonCoherentAtomSize for flush size (instead of hardcoded constant) + VkMappedMemoryRange flushedRange{}; + flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flushedRange.memory = uploadReservation.vkMem; + flushedRange.offset = uploadReservation.bufferOffset; + flushedRange.size = uploadReservation.size; + vkFlushMappedMemoryRanges(m_vkr->GetLogicalDevice(), 1, &flushedRange); + */ +} + +void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) +{ + for (auto& itr : m_buffers) + { + while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer) + { + itr.queue_syncPoints.pop(); + } + if (itr.queue_syncPoints.empty()) + itr.cleanupCounter++; + } + + // check if last buffer is available for deletion + if (m_buffers.size() >= 2) + { + auto& lastBuffer = m_buffers.back(); + if (lastBuffer.cleanupCounter >= 1000) + { + // release buffer + lastBuffer.mtlBuffer->release(); + m_buffers.pop_back(); + } + } +} + +MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const +{ + return m_buffers[index].mtlBuffer; +} + +void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const +{ + numBuffers = (uint32)m_buffers.size(); + totalBufferSize = 0; + freeBufferSize = 0; + for (auto& itr : m_buffers) + { + totalBufferSize += itr.size; + // calculate free space in buffer + uint32 distanceToSyncPoint; + if (!itr.queue_syncPoints.empty()) + { + if (itr.queue_syncPoints.front().offset < itr.writeIndex) + distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around + else + distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex; + } + else + distanceToSyncPoint = itr.size; + freeBufferSize += distanceToSyncPoint; + } +} + +/* MetalSynchronizedHeapAllocator */ + +MetalSynchronizedHeapAllocator::MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize) + : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, minimumBufferAllocSize) {}; + +MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) +{ + CHAddr addr = m_chunkedHeap.alloc(size, alignment); + m_activeAllocations.emplace_back(addr); + AllocatorReservation* res = m_poolAllocatorReservation.allocObj(); + res->bufferIndex = addr.chunkIndex; + res->bufferOffset = addr.offset; + res->size = size; + res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex); + res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset; + + return res; +} + +void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation) +{ + // put the allocation on a delayed release queue for the current command buffer + MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer(); + auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; }); + cemu_assert_debug(it != m_activeAllocations.end()); + m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation); + m_activeAllocations.erase(it); + m_poolAllocatorReservation.freeObj(uploadReservation); +} + +void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation) +{ + /* + if (m_chunkedHeap.RequiresFlush(uploadReservation->bufferIndex)) + { + VkMappedMemoryRange flushedRange{}; + flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flushedRange.memory = uploadReservation->vkMem; + flushedRange.offset = uploadReservation->bufferOffset; + flushedRange.size = uploadReservation->size; + vkFlushMappedMemoryRanges(VulkanRenderer::GetInstance()->GetLogicalDevice(), 1, &flushedRange); + } + */ +} + +void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) +{ + auto it = m_releaseQueue.find(latestFinishedCommandBuffer); + if (it == m_releaseQueue.end()) + return; + + // release allocations + for(auto& addr : it->second) + m_chunkedHeap.free(addr); + it = m_releaseQueue.erase(it); +} + +void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const +{ + m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 102ccdc9..1db06527 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,381 +1,140 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "util/ChunkedHeap/ChunkedHeap.h" #include "util/helpers/MemoryPool.h" #include -struct MetalBufferRange +class MetalBufferChunkedHeap : private ChunkedHeap<> { - size_t offset; - size_t size; + public: + MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; + ~MetalBufferChunkedHeap(); + + using ChunkedHeap::alloc; + using ChunkedHeap::free; + + uint8* GetChunkPtr(uint32 index) const + { + if (index >= m_chunkBuffers.size()) + return nullptr; + + return (uint8*)m_chunkBuffers[index]->contents(); + } + + MTL::Buffer* GetBufferByIndex(uint32 index) const + { + cemu_assert_debug(index < m_chunkBuffers.size()); + + return m_chunkBuffers[index]; + } + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const + { + numBuffers = m_chunkBuffers.size(); + totalBufferSize = m_numHeapBytes; + freeBufferSize = m_numHeapBytes - m_numAllocatedBytes; + } + + private: + uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override; + + const class MetalRenderer* m_mtlr; + + std::vector m_chunkBuffers; + size_t m_minimumBufferAllocationSize; }; -constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024; // 8 MB -constexpr size_t MAX_ALLOCATION_SIZE = 64 * 1024 * 1024; // 64 MB - -void LatteIndices_invalidateAll(); - -template -class MetalBufferAllocator +// a circular ring-buffer which tracks and releases memory per command-buffer +class MetalSynchronizedRingAllocator { public: - struct Buffer - { - MTL::Buffer* m_buffer; - std::vector m_freeRanges; - BufferT m_data; - }; + MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; + MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy - MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} { - m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged); + struct BufferSyncPoint_t + { + // todo - modularize sync point + MTL::CommandBuffer* commandBuffer; + uint32 offset; - m_options = storageMode; - if (m_isCPUAccessible) - m_options |= MTL::ResourceCPUCacheModeWriteCombined; - } + BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {}; + }; - ~MetalBufferAllocator() - { - for (auto buffer : m_buffers) - { - buffer.m_buffer->release(); - } - } + struct AllocatorBuffer_t + { + MTL::Buffer* mtlBuffer; + uint8* basePtr; + uint32 size; + uint32 writeIndex; + std::queue queue_syncPoints; + MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr }; + uint32 index; + uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released + }; - void ResetAllocations() - { - for (uint32 i = 0; i < m_buffers.size(); i++) - FreeBuffer(i); - } + struct AllocatorReservation_t + { + MTL::Buffer* mtlBuffer; + uint8* memPtr; + uint32 bufferOffset; + uint32 size; + uint32 bufferIndex; + }; - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - return m_buffers[bufferIndex].m_buffer; - } + AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment); + void FlushReservation(AllocatorReservation_t& uploadReservation); + void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); + MTL::Buffer* GetBufferByIndex(uint32 index) const; - MetalBufferAllocation GetAllocation(size_t size) - { - // Align the size - size = Align(size, 128); - - // First, try to find a free range - for (uint32 i = 0; i < m_buffers.size(); i++) - { - auto& buffer = m_buffers[i]; - for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++) - { - auto& range = buffer.m_freeRanges[j]; - if (size <= range.size) - { - MetalBufferAllocation allocation; - allocation.bufferIndex = i; - allocation.offset = range.offset; - allocation.size = size; - allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr); - - range.offset += size; - range.size -= size; - - if (range.size == 0) - { - buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j); - } - - return allocation; - } - } - } - - // If no free range was found, allocate a new buffer - size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size()); - allocationSize = std::min(allocationSize, MAX_ALLOCATION_SIZE); // Limit the allocation size - allocationSize = std::max(allocationSize, size); - MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options); - #ifdef CEMU_DEBUG_ASSERT - mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer)); - #endif - - MetalBufferAllocation allocation; - allocation.bufferIndex = m_buffers.size(); - allocation.offset = 0; - allocation.size = size; - allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr); - - m_buffers.push_back({mtlBuffer}); - auto& buffer = m_buffers.back(); - - // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges - if (size < allocationSize) - { - MetalBufferRange range; - range.offset = size; - range.size = allocationSize - size; - - buffer.m_freeRanges.push_back(range); - } - - // Debug - m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize; - - return allocation; - } - -protected: - class MetalRenderer* m_mtlr; - - // TODO: make these template arguments - bool m_isCPUAccessible; - MTL::ResourceOptions m_options; - - std::vector m_buffers; - - void FreeBuffer(uint32 bufferIndex) - { - auto& buffer = m_buffers[bufferIndex]; - buffer.m_freeRanges.clear(); - buffer.m_freeRanges.push_back({0, buffer.m_buffer->length()}); - } -}; - -struct Empty {}; -typedef MetalBufferAllocator MetalDefaultBufferAllocator; - -struct MetalSyncedBuffer -{ - uint32 m_commandBufferCount = 0; - MTL::CommandBuffer* m_lastCommandBuffer = nullptr; - uint32 m_lock = 0; - - bool IsLocked() const - { - return (m_lock != 0); - } -}; - -constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024; - -class MetalTemporaryBufferAllocator : public MetalBufferAllocator -{ -public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} - - void LockBuffer(uint32 bufferIndex) - { - m_buffers[bufferIndex].m_data.m_lock++; - } - - void UnlockBuffer(uint32 bufferIndex) - { - auto& buffer = m_buffers[bufferIndex]; - - buffer.m_data.m_lock--; - - // Release the buffer if it wasn't released due to the lock - if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(bufferIndex); - } - - void EndFrame() - { - // Unlock all buffers - for (uint32_t i = 0; i < m_buffers.size(); i++) - { - auto& buffer = m_buffers[i]; - - if (buffer.m_data.IsLocked()) - { - if (buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(i); - - buffer.m_data.m_lock = 0; - } - } - - // TODO: do this for other buffer allocators as well? - // Track how many frames have passed since the last access to the back buffer - if (!m_buffers.empty()) - { - auto& backBuffer = m_buffers.back(); - if (backBuffer.m_data.m_commandBufferCount == 0) - { - // Release the back buffer if it hasn't been accessed for a while - if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD) - { - // Debug - m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length(); - - backBuffer.m_buffer->release(); - m_buffers.pop_back(); - - m_framesSinceBackBufferAccess = 0; - } - else - { - m_framesSinceBackBufferAccess++; - } - } - else - { - m_framesSinceBackBufferAccess = 0; - } - } - } - - void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) - { - m_activeCommandBuffer = commandBuffer; - if (commandBuffer) - { - auto result = m_executingCommandBuffers.emplace(std::make_pair(m_activeCommandBuffer, std::vector{})); - cemu_assert_debug(result.second); - m_activeCommandBufferIt = result.first; - } - else - { - m_activeCommandBufferIt = m_executingCommandBuffers.end(); - } - } - - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) - { - auto it = m_executingCommandBuffers.find(commandBuffer); - for (auto bufferIndex : it->second) - { - auto& buffer = m_buffers[bufferIndex]; - buffer.m_data.m_commandBufferCount--; - - // TODO: is this neccessary? - if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(bufferIndex); - } - - m_executingCommandBuffers.erase(it); - } - - void MarkBufferAsUsed(uint32 bufferIndex) - { - cemu_assert_debug(m_activeCommandBuffer); - - auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_data.m_commandBufferCount == 0 || buffer.m_data.m_lastCommandBuffer != m_activeCommandBuffer) - { - m_activeCommandBufferIt->second.push_back(bufferIndex); - buffer.m_data.m_commandBufferCount++; - buffer.m_data.m_lastCommandBuffer = m_activeCommandBuffer; - } - } - - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - MarkBufferAsUsed(bufferIndex); - - return m_buffers[bufferIndex].m_buffer; - } - - MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) - { - return m_buffers[bufferIndex].m_buffer; - } - - MetalBufferAllocation* GetAllocationPtr(size_t size) - { - MetalBufferAllocation* allocation = m_poolAllocatorReservation.allocObj(); - *allocation = GetAllocation(size); - - LockBuffer(allocation->bufferIndex); - - return allocation; - } - - void FreeAllocation(MetalBufferAllocation& allocation) - { - // TODO - /* - MetalBufferRange range; - range.offset = allocation.offset; - range.size = allocation.size; - - allocation.offset = INVALID_OFFSET; - - // Find the correct position to insert the free range - auto& buffer = m_buffers[allocation.bufferIndex]; - for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++) - { - auto& freeRange = buffer.m_freeRanges[i]; - if (freeRange.offset + freeRange.size == range.offset) - { - freeRange.size += range.size; - return; - } - } - - buffer.m_freeRanges.push_back(range); - */ - UnlockBuffer(allocation.bufferIndex); - } - - void FreeAllocation(MetalBufferAllocation* allocation) - { - FreeAllocation(*allocation); - m_poolAllocatorReservation.freeObj(allocation); - } - - /* - MetalBufferAllocation GetBufferAllocation(size_t size) - { - if (!m_activeCommandBuffer) - throw std::runtime_error("No active command buffer when allocating a buffer!"); - - auto allocation = MetalBufferAllocator::GetBufferAllocation(size); - - auto& buffer = m_buffers[allocation.bufferIndex]; - if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) - buffer.m_commandBuffers.push_back(m_activeCommandBuffer); - - return allocation; - } - */ - - // For debugging - /* - void LogInfo() - { - debug_printf("BUFFERS:\n"); - for (auto& buffer : m_buffers) - { - debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size()); - uint32 same = 0; - uint32 completed = 0; - for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++) - { - if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i])) - completed++; - for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++) - { - if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j]) - same++; - } - } - debug_printf(" same: %u\n", same); - debug_printf(" completed: %u\n", completed); - - debug_printf(" FREE RANGES:\n"); - for (auto& range : buffer.m_freeRanges) - { - debug_printf(" offset: %zu, size: %zu\n", range.offset, range.size); - } - } - } - */ + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; private: - MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc); + void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset); - std::map> m_executingCommandBuffers; - std::map>::iterator m_activeCommandBufferIt; + const class MetalRenderer* m_mtlr; + const uint32 m_minimumBufferAllocSize; - MemoryPool m_poolAllocatorReservation{32}; - - uint16 m_framesSinceBackBufferAccess = 0; + std::vector m_buffers; +}; + +// heap style allocator with released memory being freed after the current command buffer finishes +class MetalSynchronizedHeapAllocator +{ + struct TrackedAllocation + { + TrackedAllocation(CHAddr allocation) : allocation(allocation) {}; + CHAddr allocation; + }; + + public: + MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize); + MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy + + struct AllocatorReservation + { + MTL::Buffer* mtlBuffer; + uint8* memPtr; + uint32 bufferOffset; + uint32 size; + uint32 bufferIndex; + }; + + AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment); + void FreeReservation(AllocatorReservation* uploadReservation); + void FlushReservation(AllocatorReservation* uploadReservation); + + void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; + private: + const class MetalRenderer* m_mtlr; + MetalBufferChunkedHeap m_chunkedHeap; + // allocations + std::vector m_activeAllocations; + MemoryPool m_poolAllocatorReservation{32}; + // release queue + std::unordered_map> m_releaseQueue; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 25d82d5f..45a06139 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -73,20 +73,14 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si if (m_bufferCacheMode == BufferCacheMode::DevicePrivate) { - auto allocation = m_tempBufferAllocator.GetAllocation(size); - auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); - memcpy((uint8*)buffer->contents() + allocation.offset, data, size); + auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder(); - // Lock the buffer to make sure it's not deallocated before the copy is done - m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); + auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1); + memcpy(allocation.memPtr, data, size); - m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); + blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size); - // Mark buffer as used - m_tempBufferAllocator.MarkBufferAsUsed(allocation.bufferIndex); - - // We can now safely unlock the buffer - m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); + //m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); } else { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 3d70e0db..4f040337 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -7,22 +7,17 @@ class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 32u * 1024 * 1024), m_indexAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 4u * 1024 * 1024) {} ~MetalMemoryManager(); - MetalDefaultBufferAllocator& GetBufferAllocator() + MetalSynchronizedRingAllocator& GetStagingAllocator() { - return m_bufferAllocator; + return m_stagingAllocator; } - MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator() + MetalSynchronizedHeapAllocator& GetIndexAllocator() { - return m_framePersistentBufferAllocator; - } - - MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator() - { - return m_tempBufferAllocator; + return m_indexAllocator; } MTL::Buffer* GetBufferCache() @@ -30,6 +25,12 @@ public: return m_bufferCache; } + void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer) + { + m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer); + m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer); + } + // Texture upload buffer void* AcquireTextureUploadBuffer(size_t size); void ReleaseTextureUploadBuffer(uint8* mem); @@ -65,9 +66,8 @@ private: std::vector m_textureUploadBuffer; - MetalDefaultBufferAllocator m_bufferAllocator; - MetalDefaultBufferAllocator m_framePersistentBufferAllocator; - MetalTemporaryBufferAllocator m_tempBufferAllocator; + MetalSynchronizedRingAllocator m_stagingAllocator; + MetalSynchronizedHeapAllocator m_indexAllocator; MTL::Buffer* m_bufferCache = nullptr; BufferCacheMode m_bufferCacheMode; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index a5d50c46..17a0d86d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -21,8 +21,8 @@ #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Common/precompiled.h" +#include "HW/Latte/Renderer/Metal/MetalBufferAllocator.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Metal/MTLCaptureManager.hpp" #include "config/CemuConfig.h" #include "gui/guiWrapper.h" @@ -191,6 +191,7 @@ MetalRenderer::MetalRenderer() utilityLibrary->release(); // HACK: for some reason, this variable ends up being initialized to some garbage data, even though its declared as bool m_captureFrame = false; + m_occlusionQuery.m_lastCommandBuffer = nullptr; m_captureFrame = false; } @@ -302,12 +303,6 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Reset the command buffers (they are released by TemporaryBufferAllocator) CommitCommandBuffer(); - // Release frame persistent buffers - m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); - - // Unlock all temporary buffers - m_memoryManager->GetTemporaryBufferAllocator().EndFrame(); - // Debug m_performanceMonitor.ResetPerFrameData(); @@ -682,17 +677,16 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s auto blitCommandEncoder = GetBlitCommandEncoder(); // Allocate a temporary buffer - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetAllocation(compressedImageSize); - auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); + auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); + auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1); // Copy the data to the temporary buffer - memcpy(allocation.data, pixelData, compressedImageSize); + memcpy(allocation.memPtr, pixelData, compressedImageSize); //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // TODO: specify blit options when copying to a depth stencil texture? // Copy the data from the temporary buffer to the texture - blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); + blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); //} } @@ -1069,7 +1063,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 indexMax = 0; Renderer::IndexAllocation indexAllocation; LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation); - MetalBufferAllocation* indexAllocationMtl = static_cast(indexAllocation.rendererInternal); + auto indexAllocationMtl = static_cast(indexAllocation.rendererInternal); // Buffer cache if (m_memoryManager->UseHostMemoryForCache()) @@ -1308,17 +1302,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader); // Draw - MTL::Buffer* indexBuffer = nullptr; - if (hostIndexType != INDEX_TYPE::NONE) - { - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - indexBuffer = bufferAllocator.GetBuffer(indexAllocationMtl->bufferIndex); - } - if (usesGeometryShader) { - if (indexBuffer) - SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexAllocationMtl->offset, vertexShader->resourceMapping.indexBufferBinding); + if (hostIndexType != INDEX_TYPE::NONE) + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, vertexShader->resourceMapping.indexBufferBinding); uint8 hostIndexTypeU8 = (uint8)hostIndexType; renderCommandEncoder->setObjectBytes(&hostIndexTypeU8, sizeof(hostIndexTypeU8), vertexShader->resourceMapping.indexTypeBinding); @@ -1346,10 +1333,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } else { - if (indexBuffer) + if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); - renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexAllocationMtl->offset, instanceCount, baseVertex, baseInstance); + renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, instanceCount, baseVertex, baseInstance); } else { @@ -1491,25 +1478,19 @@ void MetalRenderer::draw_handleSpecialState5() Renderer::IndexAllocation MetalRenderer::indexData_reserveIndexMemory(uint32 size) { - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetAllocationPtr(size); + auto allocation = m_memoryManager->GetIndexAllocator().AllocateBufferMemory(size, 128); - return {allocation->data, allocation}; + return {allocation->memPtr, allocation}; } void MetalRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation) { - auto allocationMtl = static_cast(allocation.rendererInternal); - - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - bufferAllocator.FreeAllocation(allocationMtl); + m_memoryManager->GetIndexAllocator().FreeReservation(static_cast(allocation.rendererInternal)); } void MetalRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation) { - // TODO: uncomment - //auto& bufferAllocator = m_memoryManager->GetBufferAllocator(); - //bufferAllocator.FlushAllocation(static_cast(allocation.rendererInternal)); + m_memoryManager->GetIndexAllocator().FlushReservation(static_cast(allocation.rendererInternal)); } LatteQueryObject* MetalRenderer::occlusionQuery_create() { @@ -1647,9 +1628,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() m_recordedDrawcalls = 0; m_commitTreshold = m_defaultCommitTreshlod; - // Notify memory manager about the new command buffer - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); - // Debug m_performanceMonitor.m_commandBuffers++; @@ -1830,8 +1808,6 @@ void MetalRenderer::CommitCommandBuffer() m_executingCommandBuffers.push_back(mtlCommandBuffer); - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); - // Debug //m_commandQueue->insertDebugCaptureBoundary(); } @@ -1846,7 +1822,7 @@ void MetalRenderer::ProcessFinishedCommandBuffers() auto commandBuffer = *it; if (CommandBufferCompleted(commandBuffer)) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer); + m_memoryManager->CleanupBuffers(commandBuffer); commandBuffer->release(); it = m_executingCommandBuffers.erase(it); atLeastOneCompleted = true; @@ -2098,14 +2074,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE } size_t size = shader->uniform.uniformRangeSize; - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto supportBuffer = bufferAllocator.GetAllocation(size); - memcpy(supportBuffer.data, supportBufferData, size); - auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); + auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); + auto supportBuffer = bufferAllocator.AllocateBufferMemory(size, 1); + memcpy(supportBuffer.memPtr, supportBufferData, size); //if (!HasUnifiedMemory()) // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); - SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); + SetBuffer(renderCommandEncoder, mtlShaderType, supportBuffer.mtlBuffer, supportBuffer.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 5a1dbbf5..04c63be8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,19 +7,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -struct MetalBufferAllocation -{ - void* data; - uint32 bufferIndex; - size_t offset = INVALID_OFFSET; - size_t size; - - bool IsValid() const - { - return offset != INVALID_OFFSET; - } -}; - enum MetalGeneralShaderType { METAL_GENERAL_SHADER_TYPE_VERTEX, @@ -295,14 +282,14 @@ public: return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited); } - MTL::CommandBuffer* GetCurrentCommandBuffer() + MTL::CommandBuffer* GetCurrentCommandBuffer() const { cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer); return m_currentCommandBuffer.m_commandBuffer; } - MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() + MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const { // The command buffer has been commited and has finished execution if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0) From d086eb3db506b9d595f39400b0f7c83545a41bfa Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 18 Jan 2025 17:22:28 +0100 Subject: [PATCH 3/5] fix: index buffer crashes --- src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp | 4 ++-- src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp index 62d0c093..44d11a3b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp @@ -222,9 +222,9 @@ void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFin return; // release allocations - for(auto& addr : it->second) + for (auto& addr : it->second) m_chunkedHeap.free(addr); - it = m_releaseQueue.erase(it); + m_releaseQueue.erase(it); } void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 17a0d86d..19afbf06 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -1816,7 +1816,6 @@ void MetalRenderer::CommitCommandBuffer() void MetalRenderer::ProcessFinishedCommandBuffers() { // Check for finished command buffers - bool atLeastOneCompleted = false; for (auto it = m_executingCommandBuffers.begin(); it != m_executingCommandBuffers.end();) { auto commandBuffer = *it; @@ -1825,17 +1824,12 @@ void MetalRenderer::ProcessFinishedCommandBuffers() m_memoryManager->CleanupBuffers(commandBuffer); commandBuffer->release(); it = m_executingCommandBuffers.erase(it); - atLeastOneCompleted = true; } else { ++it; } } - - // Invalidate indices if at least one command buffer has completed - if (atLeastOneCompleted) - LatteIndices_invalidateAll(); } bool MetalRenderer::AcquireDrawable(bool mainWindow) From 6d6c04ae3c5eeccf29ad2e1eb4d960e40588ba1b Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 18 Jan 2025 17:27:51 +0100 Subject: [PATCH 4/5] update debug overlay --- .../Renderer/Metal/MetalPerformanceMonitor.h | 2 -- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 29 +++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h index cb65162e..bdbaa84b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h @@ -3,8 +3,6 @@ class MetalPerformanceMonitor { public: - size_t m_bufferAllocatorMemory = 0; - // Per frame data uint32 m_commandBuffers = 0; uint32 m_renderPasses = 0; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 19afbf06..45bc967c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -16,11 +16,10 @@ #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" -#include "Cemu/Logging/CemuDebugLogging.h" +#include "Cafe/HW/Latte/Core/LatteBufferCache.h" #include "Cemu/Logging/CemuLogging.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteConst.h" -#include "Common/precompiled.h" #include "HW/Latte/Renderer/Metal/MetalBufferAllocator.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" #include "config/CemuConfig.h" @@ -588,7 +587,6 @@ void MetalRenderer::AppendOverlayDebugInfo() ImGui::Text("--- Metal info ---"); ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize()); - ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024); ImGui::Text("--- Metal info (per frame) ---"); ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers); @@ -596,6 +594,31 @@ void MetalRenderer::AppendOverlayDebugInfo() ImGui::Text("Clears %u", m_performanceMonitor.m_clears); ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws); ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans); + + ImGui::Text("--- Cache debug info ---"); + + uint32 bufferCacheHeapSize = 0; + uint32 bufferCacheAllocationSize = 0; + uint32 bufferCacheNumAllocations = 0; + + LatteBufferCache_getStats(bufferCacheHeapSize, bufferCacheAllocationSize, bufferCacheNumAllocations); + + ImGui::Text("Buffer"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Allocs: %u", (uint32)(bufferCacheAllocationSize + 1023) / 1024, ((uint32)bufferCacheHeapSize + 1023) / 1024, (uint32)bufferCacheNumAllocations); + + uint32 numBuffers; + size_t totalSize, freeSize; + + m_memoryManager->GetStagingAllocator().GetStats(numBuffers, totalSize, freeSize); + ImGui::Text("Staging"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); + + m_memoryManager->GetIndexAllocator().GetStats(numBuffers, totalSize, freeSize); + ImGui::Text("Index"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); } void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) From bf93f907398e38fd4f5fe82156fc266dad787667 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Sat, 18 Jan 2025 18:01:40 +0100 Subject: [PATCH 5/5] flush uploaded buffers --- .../Renderer/Metal/MetalBufferAllocator.cpp | 32 +++++-------------- .../Renderer/Metal/MetalBufferAllocator.h | 31 +++++++++++++++--- .../Renderer/Metal/MetalMemoryManager.cpp | 1 + .../Latte/Renderer/Metal/MetalMemoryManager.h | 2 +- .../HW/Latte/Renderer/Metal/MetalRenderer.cpp | 10 +++--- 5 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp index 44d11a3b..05d169b3 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp @@ -9,7 +9,7 @@ MetalBufferChunkedHeap::~MetalBufferChunkedHeap() uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) { size_t allocationSize = std::max(m_minimumBufferAllocationSize, minimumAllocationSize); - MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, MTL::ResourceStorageModeShared); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options); cemu_assert_debug(buffer); cemu_assert_debug(m_chunkBuffers.size() == chunkIndex); m_chunkBuffers.emplace_back(buffer); @@ -36,7 +36,7 @@ void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeR AllocatorBuffer_t newBuffer{}; newBuffer.writeIndex = 0; newBuffer.basePtr = nullptr; - newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, MTL::ResourceStorageModeShared); + newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options); newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents(); newBuffer.size = bufferAllocSize; newBuffer.index = (uint32)m_buffers.size(); @@ -105,16 +105,10 @@ MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllo void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation) { - /* - cemu_assert_debug(m_bufferType == VKR_BUFFER_TYPE::STAGING); // only the staging buffer isn't coherent - // todo - use nonCoherentAtomSize for flush size (instead of hardcoded constant) - VkMappedMemoryRange flushedRange{}; - flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flushedRange.memory = uploadReservation.vkMem; - flushedRange.offset = uploadReservation.bufferOffset; - flushedRange.size = uploadReservation.size; - vkFlushMappedMemoryRanges(m_vkr->GetLogicalDevice(), 1, &flushedRange); - */ + if (RequiresFlush()) + { + uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size)); + } } void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) @@ -172,9 +166,6 @@ void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalB /* MetalSynchronizedHeapAllocator */ -MetalSynchronizedHeapAllocator::MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize) - : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, minimumBufferAllocSize) {}; - MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) { CHAddr addr = m_chunkedHeap.alloc(size, alignment); @@ -202,17 +193,10 @@ void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploa void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation) { - /* - if (m_chunkedHeap.RequiresFlush(uploadReservation->bufferIndex)) + if (m_chunkedHeap.RequiresFlush()) { - VkMappedMemoryRange flushedRange{}; - flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; - flushedRange.memory = uploadReservation->vkMem; - flushedRange.offset = uploadReservation->bufferOffset; - flushedRange.size = uploadReservation->size; - vkFlushMappedMemoryRanges(VulkanRenderer::GetInstance()->GetLogicalDevice(), 1, &flushedRange); + uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size)); } - */ } void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 1db06527..2a62de19 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,15 +1,24 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" +#include "Metal/MTLResource.hpp" #include "util/ChunkedHeap/ChunkedHeap.h" #include "util/helpers/MemoryPool.h" #include +inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options) +{ + if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged) + options |= MTL::ResourceCPUCacheModeWriteCombined; + + return options; +} + class MetalBufferChunkedHeap : private ChunkedHeap<> { public: - MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; + MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; ~MetalBufferChunkedHeap(); using ChunkedHeap::alloc; @@ -30,6 +39,11 @@ class MetalBufferChunkedHeap : private ChunkedHeap<> return m_chunkBuffers[index]; } + bool RequiresFlush() const + { + return m_options & MTL::ResourceStorageModeManaged; + } + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const { numBuffers = m_chunkBuffers.size(); @@ -42,15 +56,17 @@ class MetalBufferChunkedHeap : private ChunkedHeap<> const class MetalRenderer* m_mtlr; - std::vector m_chunkBuffers; + MTL::ResourceOptions m_options; size_t m_minimumBufferAllocationSize; + + std::vector m_chunkBuffers; }; // a circular ring-buffer which tracks and releases memory per command-buffer class MetalSynchronizedRingAllocator { public: - MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; + MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy struct BufferSyncPoint_t @@ -88,6 +104,11 @@ public: void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); MTL::Buffer* GetBufferByIndex(uint32 index) const; + bool RequiresFlush() const + { + return m_options & MTL::ResourceStorageModeManaged; + } + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; private: @@ -95,6 +116,8 @@ private: void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset); const class MetalRenderer* m_mtlr; + + MTL::ResourceOptions m_options; const uint32 m_minimumBufferAllocSize; std::vector m_buffers; @@ -110,7 +133,7 @@ class MetalSynchronizedHeapAllocator }; public: - MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, size_t minimumBufferAllocSize); + MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {} MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy struct AllocatorReservation diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 45a06139..7b1dd53f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -77,6 +77,7 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1); memcpy(allocation.memPtr, data, size); + m_stagingAllocator.FlushReservation(allocation); blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 4f040337..4e55fa6f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -7,7 +7,7 @@ class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 32u * 1024 * 1024), m_indexAllocator(m_mtlr/*, m_mtlr->GetOptimalBufferStorageMode()*/, 4u * 1024 * 1024) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {} ~MetalMemoryManager(); MetalSynchronizedRingAllocator& GetStagingAllocator() diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 45bc967c..61e5c94a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -702,6 +702,7 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s // Allocate a temporary buffer auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1); + bufferAllocator.FlushReservation(allocation); // Copy the data to the temporary buffer memcpy(allocation.memPtr, pixelData, compressedImageSize); @@ -2092,12 +2093,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE size_t size = shader->uniform.uniformRangeSize; auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); - auto supportBuffer = bufferAllocator.AllocateBufferMemory(size, 1); - memcpy(supportBuffer.memPtr, supportBufferData, size); - //if (!HasUnifiedMemory()) - // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + auto allocation = bufferAllocator.AllocateBufferMemory(size, 1); + memcpy(allocation.memPtr, supportBufferData, size); + bufferAllocator.FlushReservation(allocation); - SetBuffer(renderCommandEncoder, mtlShaderType, supportBuffer.mtlBuffer, supportBuffer.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); + SetBuffer(renderCommandEncoder, mtlShaderType, allocation.mtlBuffer, allocation.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers