mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 14:31:17 +12:00
Merge pull request #19 from SamoZ256/metal-allocator-rework
Allocator rework
This commit is contained in:
commit
f0cf61461c
9 changed files with 445 additions and 450 deletions
|
@ -555,6 +555,7 @@ if(ENABLE_METAL)
|
|||
HW/Latte/Renderer/Metal/CachedFBOMtl.h
|
||||
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
|
||||
HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
|
||||
HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
|
||||
HW/Latte/Renderer/Metal/MetalBufferAllocator.h
|
||||
HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
|
||||
HW/Latte/Renderer/Metal/MetalMemoryManager.h
|
||||
|
|
217
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
Normal file
217
src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
Normal file
|
@ -0,0 +1,217 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
|
||||
|
||||
MetalBufferChunkedHeap::~MetalBufferChunkedHeap()
|
||||
{
|
||||
for (auto& chunk : m_chunkBuffers)
|
||||
chunk->release();
|
||||
}
|
||||
|
||||
uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize)
|
||||
{
|
||||
size_t allocationSize = std::max<size_t>(m_minimumBufferAllocationSize, minimumAllocationSize);
|
||||
MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
|
||||
cemu_assert_debug(buffer);
|
||||
cemu_assert_debug(m_chunkBuffers.size() == chunkIndex);
|
||||
m_chunkBuffers.emplace_back(buffer);
|
||||
|
||||
return allocationSize;
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset)
|
||||
{
|
||||
auto commandBuffer = m_mtlr->GetCurrentCommandBuffer();
|
||||
if (commandBuffer == buffer.lastSyncpointCommandBuffer)
|
||||
return;
|
||||
buffer.lastSyncpointCommandBuffer = commandBuffer;
|
||||
buffer.queue_syncPoints.emplace(commandBuffer, offset);
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc)
|
||||
{
|
||||
// calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc
|
||||
uint32 bufferAllocSize = m_minimumBufferAllocSize;
|
||||
while (bufferAllocSize < sizeRequiredForAlloc)
|
||||
bufferAllocSize += m_minimumBufferAllocSize;
|
||||
|
||||
AllocatorBuffer_t newBuffer{};
|
||||
newBuffer.writeIndex = 0;
|
||||
newBuffer.basePtr = nullptr;
|
||||
newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options);
|
||||
newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents();
|
||||
newBuffer.size = bufferAllocSize;
|
||||
newBuffer.index = (uint32)m_buffers.size();
|
||||
m_buffers.push_back(newBuffer);
|
||||
}
|
||||
|
||||
MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
|
||||
{
|
||||
if (alignment < 128)
|
||||
alignment = 128;
|
||||
size = (size + 127) & ~127;
|
||||
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
// align pointer
|
||||
uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment;
|
||||
uint32 distanceToSyncPoint;
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
|
||||
distanceToSyncPoint = 0xFFFFFFFF;
|
||||
else
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
|
||||
}
|
||||
else
|
||||
distanceToSyncPoint = 0xFFFFFFFF;
|
||||
uint32 spaceNeeded = alignmentPadding + size;
|
||||
if (spaceNeeded > distanceToSyncPoint)
|
||||
continue; // not enough space in current buffer
|
||||
if ((itr.writeIndex + spaceNeeded) > itr.size)
|
||||
{
|
||||
// wrap-around
|
||||
spaceNeeded = size;
|
||||
alignmentPadding = 0;
|
||||
// check if there is enough space in current buffer after wrap-around
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0;
|
||||
if (spaceNeeded > distanceToSyncPoint)
|
||||
continue;
|
||||
}
|
||||
else if (spaceNeeded > itr.size)
|
||||
continue;
|
||||
itr.writeIndex = 0;
|
||||
}
|
||||
addUploadBufferSyncPoint(itr, itr.writeIndex);
|
||||
itr.writeIndex += alignmentPadding;
|
||||
uint32 offset = itr.writeIndex;
|
||||
itr.writeIndex += size;
|
||||
itr.cleanupCounter = 0;
|
||||
MetalSynchronizedRingAllocator::AllocatorReservation_t res;
|
||||
res.mtlBuffer = itr.mtlBuffer;
|
||||
res.memPtr = itr.basePtr + offset;
|
||||
res.bufferOffset = offset;
|
||||
res.size = size;
|
||||
res.bufferIndex = itr.index;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// allocate new buffer
|
||||
allocateAdditionalUploadBuffer(size);
|
||||
|
||||
return AllocateBufferMemory(size, alignment);
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation)
|
||||
{
|
||||
if (RequiresFlush())
|
||||
{
|
||||
uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size));
|
||||
}
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer)
|
||||
{
|
||||
itr.queue_syncPoints.pop();
|
||||
}
|
||||
if (itr.queue_syncPoints.empty())
|
||||
itr.cleanupCounter++;
|
||||
}
|
||||
|
||||
// check if last buffer is available for deletion
|
||||
if (m_buffers.size() >= 2)
|
||||
{
|
||||
auto& lastBuffer = m_buffers.back();
|
||||
if (lastBuffer.cleanupCounter >= 1000)
|
||||
{
|
||||
// release buffer
|
||||
lastBuffer.mtlBuffer->release();
|
||||
m_buffers.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const
|
||||
{
|
||||
return m_buffers[index].mtlBuffer;
|
||||
}
|
||||
|
||||
void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
numBuffers = (uint32)m_buffers.size();
|
||||
totalBufferSize = 0;
|
||||
freeBufferSize = 0;
|
||||
for (auto& itr : m_buffers)
|
||||
{
|
||||
totalBufferSize += itr.size;
|
||||
// calculate free space in buffer
|
||||
uint32 distanceToSyncPoint;
|
||||
if (!itr.queue_syncPoints.empty())
|
||||
{
|
||||
if (itr.queue_syncPoints.front().offset < itr.writeIndex)
|
||||
distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around
|
||||
else
|
||||
distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
|
||||
}
|
||||
else
|
||||
distanceToSyncPoint = itr.size;
|
||||
freeBufferSize += distanceToSyncPoint;
|
||||
}
|
||||
}
|
||||
|
||||
/* MetalSynchronizedHeapAllocator */
|
||||
|
||||
MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
|
||||
{
|
||||
CHAddr addr = m_chunkedHeap.alloc(size, alignment);
|
||||
m_activeAllocations.emplace_back(addr);
|
||||
AllocatorReservation* res = m_poolAllocatorReservation.allocObj();
|
||||
res->bufferIndex = addr.chunkIndex;
|
||||
res->bufferOffset = addr.offset;
|
||||
res->size = size;
|
||||
res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex);
|
||||
res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation)
|
||||
{
|
||||
// put the allocation on a delayed release queue for the current command buffer
|
||||
MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer();
|
||||
auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; });
|
||||
cemu_assert_debug(it != m_activeAllocations.end());
|
||||
m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation);
|
||||
m_activeAllocations.erase(it);
|
||||
m_poolAllocatorReservation.freeObj(uploadReservation);
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation)
|
||||
{
|
||||
if (m_chunkedHeap.RequiresFlush())
|
||||
{
|
||||
uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size));
|
||||
}
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
auto it = m_releaseQueue.find(latestFinishedCommandBuffer);
|
||||
if (it == m_releaseQueue.end())
|
||||
return;
|
||||
|
||||
// release allocations
|
||||
for (auto& addr : it->second)
|
||||
m_chunkedHeap.free(addr);
|
||||
m_releaseQueue.erase(it);
|
||||
}
|
||||
|
||||
void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize);
|
||||
}
|
|
@ -1,354 +1,163 @@
|
|||
#pragma once
|
||||
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "Metal/MTLResource.hpp"
|
||||
#include "util/ChunkedHeap/ChunkedHeap.h"
|
||||
#include "util/helpers/MemoryPool.h"
|
||||
|
||||
#include <utility>
|
||||
|
||||
struct MetalBufferRange
|
||||
inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options)
|
||||
{
|
||||
size_t offset;
|
||||
size_t size;
|
||||
if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged)
|
||||
options |= MTL::ResourceCPUCacheModeWriteCombined;
|
||||
|
||||
return options;
|
||||
}
|
||||
|
||||
class MetalBufferChunkedHeap : private ChunkedHeap<>
|
||||
{
|
||||
public:
|
||||
MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { };
|
||||
~MetalBufferChunkedHeap();
|
||||
|
||||
using ChunkedHeap::alloc;
|
||||
using ChunkedHeap::free;
|
||||
|
||||
uint8* GetChunkPtr(uint32 index) const
|
||||
{
|
||||
if (index >= m_chunkBuffers.size())
|
||||
return nullptr;
|
||||
|
||||
return (uint8*)m_chunkBuffers[index]->contents();
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBufferByIndex(uint32 index) const
|
||||
{
|
||||
cemu_assert_debug(index < m_chunkBuffers.size());
|
||||
|
||||
return m_chunkBuffers[index];
|
||||
}
|
||||
|
||||
bool RequiresFlush() const
|
||||
{
|
||||
return m_options & MTL::ResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
|
||||
{
|
||||
numBuffers = m_chunkBuffers.size();
|
||||
totalBufferSize = m_numHeapBytes;
|
||||
freeBufferSize = m_numHeapBytes - m_numAllocatedBytes;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override;
|
||||
|
||||
const class MetalRenderer* m_mtlr;
|
||||
|
||||
MTL::ResourceOptions m_options;
|
||||
size_t m_minimumBufferAllocationSize;
|
||||
|
||||
std::vector<MTL::Buffer*> m_chunkBuffers;
|
||||
};
|
||||
|
||||
constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024; // 8 MB
|
||||
constexpr size_t MAX_ALLOCATION_SIZE = 64 * 1024 * 1024; // 64 MB
|
||||
|
||||
void LatteIndices_invalidateAll();
|
||||
|
||||
template<typename BufferT>
|
||||
class MetalBufferAllocator
|
||||
// a circular ring-buffer which tracks and releases memory per command-buffer
|
||||
class MetalSynchronizedRingAllocator
|
||||
{
|
||||
public:
|
||||
struct Buffer
|
||||
MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
|
||||
MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy
|
||||
|
||||
struct BufferSyncPoint_t
|
||||
{
|
||||
// todo - modularize sync point
|
||||
MTL::CommandBuffer* commandBuffer;
|
||||
uint32 offset;
|
||||
|
||||
BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {};
|
||||
};
|
||||
|
||||
struct AllocatorBuffer_t
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* basePtr;
|
||||
uint32 size;
|
||||
uint32 writeIndex;
|
||||
std::queue<BufferSyncPoint_t> queue_syncPoints;
|
||||
MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr };
|
||||
uint32 index;
|
||||
uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released
|
||||
};
|
||||
|
||||
struct AllocatorReservation_t
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* memPtr;
|
||||
uint32 bufferOffset;
|
||||
uint32 size;
|
||||
uint32 bufferIndex;
|
||||
};
|
||||
|
||||
AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment);
|
||||
void FlushReservation(AllocatorReservation_t& uploadReservation);
|
||||
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
|
||||
MTL::Buffer* GetBufferByIndex(uint32 index) const;
|
||||
|
||||
bool RequiresFlush() const
|
||||
{
|
||||
MTL::Buffer* m_buffer;
|
||||
std::vector<MetalBufferRange> m_freeRanges;
|
||||
BufferT m_data;
|
||||
};
|
||||
|
||||
MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} {
|
||||
m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged);
|
||||
|
||||
m_options = storageMode;
|
||||
if (m_isCPUAccessible)
|
||||
m_options |= MTL::ResourceCPUCacheModeWriteCombined;
|
||||
return m_options & MTL::ResourceStorageModeManaged;
|
||||
}
|
||||
|
||||
~MetalBufferAllocator()
|
||||
{
|
||||
for (auto buffer : m_buffers)
|
||||
{
|
||||
buffer.m_buffer->release();
|
||||
}
|
||||
}
|
||||
|
||||
void ResetAllocations()
|
||||
{
|
||||
for (uint32 i = 0; i < m_buffers.size(); i++)
|
||||
FreeBuffer(i);
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBuffer(uint32 bufferIndex)
|
||||
{
|
||||
return m_buffers[bufferIndex].m_buffer;
|
||||
}
|
||||
|
||||
MetalBufferAllocation GetBufferAllocation(size_t size)
|
||||
{
|
||||
// Align the size
|
||||
size = Align(size, 128);
|
||||
|
||||
// First, try to find a free range
|
||||
for (uint32 i = 0; i < m_buffers.size(); i++)
|
||||
{
|
||||
auto& buffer = m_buffers[i];
|
||||
for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++)
|
||||
{
|
||||
auto& range = buffer.m_freeRanges[j];
|
||||
if (size <= range.size)
|
||||
{
|
||||
MetalBufferAllocation allocation;
|
||||
allocation.bufferIndex = i;
|
||||
allocation.offset = range.offset;
|
||||
allocation.size = size;
|
||||
allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
|
||||
|
||||
range.offset += size;
|
||||
range.size -= size;
|
||||
|
||||
if (range.size == 0)
|
||||
{
|
||||
buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j);
|
||||
}
|
||||
|
||||
return allocation;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If no free range was found, allocate a new buffer
|
||||
size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size());
|
||||
allocationSize = std::min(allocationSize, MAX_ALLOCATION_SIZE); // Limit the allocation size
|
||||
allocationSize = std::max(allocationSize, size);
|
||||
MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer));
|
||||
#endif
|
||||
|
||||
MetalBufferAllocation allocation;
|
||||
allocation.bufferIndex = m_buffers.size();
|
||||
allocation.offset = 0;
|
||||
allocation.size = size;
|
||||
allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr);
|
||||
|
||||
m_buffers.push_back({mtlBuffer});
|
||||
auto& buffer = m_buffers.back();
|
||||
|
||||
// If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
|
||||
if (size < allocationSize)
|
||||
{
|
||||
MetalBufferRange range;
|
||||
range.offset = size;
|
||||
range.size = allocationSize - size;
|
||||
|
||||
buffer.m_freeRanges.push_back(range);
|
||||
}
|
||||
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize;
|
||||
|
||||
return allocation;
|
||||
}
|
||||
|
||||
void FreeAllocation(MetalBufferAllocation& allocation)
|
||||
{
|
||||
MetalBufferRange range;
|
||||
range.offset = allocation.offset;
|
||||
range.size = allocation.size;
|
||||
|
||||
allocation.offset = INVALID_OFFSET;
|
||||
|
||||
// Find the correct position to insert the free range
|
||||
auto& buffer = m_buffers[allocation.bufferIndex];
|
||||
for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++)
|
||||
{
|
||||
auto& freeRange = buffer.m_freeRanges[i];
|
||||
if (freeRange.offset + freeRange.size == range.offset)
|
||||
{
|
||||
freeRange.size += range.size;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
buffer.m_freeRanges.push_back(range);
|
||||
}
|
||||
|
||||
protected:
|
||||
class MetalRenderer* m_mtlr;
|
||||
|
||||
// TODO: make these template arguments
|
||||
bool m_isCPUAccessible;
|
||||
MTL::ResourceOptions m_options;
|
||||
|
||||
std::vector<Buffer> m_buffers;
|
||||
|
||||
void FreeBuffer(uint32 bufferIndex)
|
||||
{
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
buffer.m_freeRanges.clear();
|
||||
buffer.m_freeRanges.push_back({0, buffer.m_buffer->length()});
|
||||
}
|
||||
};
|
||||
|
||||
struct Empty {};
|
||||
typedef MetalBufferAllocator<Empty> MetalDefaultBufferAllocator;
|
||||
|
||||
struct MetalSyncedBuffer
|
||||
{
|
||||
uint32 m_commandBufferCount = 0;
|
||||
MTL::CommandBuffer* m_lastCommandBuffer = nullptr;
|
||||
uint32 m_lock = 0;
|
||||
|
||||
bool IsLocked() const
|
||||
{
|
||||
return (m_lock != 0);
|
||||
}
|
||||
};
|
||||
|
||||
constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024;
|
||||
|
||||
class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
|
||||
{
|
||||
public:
|
||||
MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, MTL::ResourceStorageModeShared) {}
|
||||
|
||||
void LockBuffer(uint32 bufferIndex)
|
||||
{
|
||||
m_buffers[bufferIndex].m_data.m_lock++;
|
||||
}
|
||||
|
||||
void UnlockBuffer(uint32 bufferIndex)
|
||||
{
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
|
||||
buffer.m_data.m_lock--;
|
||||
|
||||
// Release the buffer if it wasn't released due to the lock
|
||||
if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0)
|
||||
FreeBuffer(bufferIndex);
|
||||
}
|
||||
|
||||
void EndFrame()
|
||||
{
|
||||
// Unlock all buffers
|
||||
for (uint32_t i = 0; i < m_buffers.size(); i++)
|
||||
{
|
||||
auto& buffer = m_buffers[i];
|
||||
|
||||
if (buffer.m_data.IsLocked())
|
||||
{
|
||||
if (buffer.m_data.m_commandBufferCount == 0)
|
||||
FreeBuffer(i);
|
||||
|
||||
buffer.m_data.m_lock = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: do this for other buffer allocators as well?
|
||||
// Track how many frames have passed since the last access to the back buffer
|
||||
if (!m_buffers.empty())
|
||||
{
|
||||
auto& backBuffer = m_buffers.back();
|
||||
if (backBuffer.m_data.m_commandBufferCount == 0)
|
||||
{
|
||||
// Release the back buffer if it hasn't been accessed for a while
|
||||
if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD)
|
||||
{
|
||||
// Debug
|
||||
m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length();
|
||||
|
||||
backBuffer.m_buffer->release();
|
||||
m_buffers.pop_back();
|
||||
|
||||
m_framesSinceBackBufferAccess = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_framesSinceBackBufferAccess++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_framesSinceBackBufferAccess = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
|
||||
{
|
||||
m_activeCommandBuffer = commandBuffer;
|
||||
if (commandBuffer)
|
||||
{
|
||||
auto result = m_executingCommandBuffers.emplace(std::make_pair(m_activeCommandBuffer, std::vector<uint32>{}));
|
||||
cemu_assert_debug(result.second);
|
||||
m_activeCommandBufferIt = result.first;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_activeCommandBufferIt = m_executingCommandBuffers.end();
|
||||
}
|
||||
}
|
||||
|
||||
void CommandBufferFinished(MTL::CommandBuffer* commandBuffer)
|
||||
{
|
||||
auto it = m_executingCommandBuffers.find(commandBuffer);
|
||||
for (auto bufferIndex : it->second)
|
||||
{
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
buffer.m_data.m_commandBufferCount--;
|
||||
|
||||
// TODO: is this neccessary?
|
||||
if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0)
|
||||
FreeBuffer(bufferIndex);
|
||||
}
|
||||
|
||||
m_executingCommandBuffers.erase(it);
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBuffer(uint32 bufferIndex)
|
||||
{
|
||||
cemu_assert_debug(m_activeCommandBuffer);
|
||||
|
||||
auto& buffer = m_buffers[bufferIndex];
|
||||
if (buffer.m_data.m_commandBufferCount == 0 || buffer.m_data.m_lastCommandBuffer != m_activeCommandBuffer)
|
||||
{
|
||||
m_activeCommandBufferIt->second.push_back(bufferIndex);
|
||||
buffer.m_data.m_commandBufferCount++;
|
||||
buffer.m_data.m_lastCommandBuffer = m_activeCommandBuffer;
|
||||
}
|
||||
|
||||
return buffer.m_buffer;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex)
|
||||
{
|
||||
return m_buffers[bufferIndex].m_buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
MetalBufferAllocation GetBufferAllocation(size_t size)
|
||||
{
|
||||
if (!m_activeCommandBuffer)
|
||||
throw std::runtime_error("No active command buffer when allocating a buffer!");
|
||||
|
||||
auto allocation = MetalBufferAllocator<MetalSyncedBuffer>::GetBufferAllocation(size);
|
||||
|
||||
auto& buffer = m_buffers[allocation.bufferIndex];
|
||||
if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer)
|
||||
buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
|
||||
|
||||
return allocation;
|
||||
}
|
||||
*/
|
||||
|
||||
// For debugging
|
||||
/*
|
||||
void LogInfo()
|
||||
{
|
||||
debug_printf("BUFFERS:\n");
|
||||
for (auto& buffer : m_buffers)
|
||||
{
|
||||
debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size());
|
||||
uint32 same = 0;
|
||||
uint32 completed = 0;
|
||||
for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++)
|
||||
{
|
||||
if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i]))
|
||||
completed++;
|
||||
for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
|
||||
{
|
||||
if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j])
|
||||
same++;
|
||||
}
|
||||
}
|
||||
debug_printf(" same: %u\n", same);
|
||||
debug_printf(" completed: %u\n", completed);
|
||||
|
||||
debug_printf(" FREE RANGES:\n");
|
||||
for (auto& range : buffer.m_freeRanges)
|
||||
{
|
||||
debug_printf(" offset: %zu, size: %zu\n", range.offset, range.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
|
||||
|
||||
private:
|
||||
MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
|
||||
void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc);
|
||||
void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset);
|
||||
|
||||
std::map<MTL::CommandBuffer*, std::vector<uint32>> m_executingCommandBuffers;
|
||||
std::map<MTL::CommandBuffer*, std::vector<uint32>>::iterator m_activeCommandBufferIt;
|
||||
const class MetalRenderer* m_mtlr;
|
||||
|
||||
uint16 m_framesSinceBackBufferAccess = 0;
|
||||
MTL::ResourceOptions m_options;
|
||||
const uint32 m_minimumBufferAllocSize;
|
||||
|
||||
std::vector<AllocatorBuffer_t> m_buffers;
|
||||
};
|
||||
|
||||
// heap style allocator with released memory being freed after the current command buffer finishes
|
||||
class MetalSynchronizedHeapAllocator
|
||||
{
|
||||
struct TrackedAllocation
|
||||
{
|
||||
TrackedAllocation(CHAddr allocation) : allocation(allocation) {};
|
||||
CHAddr allocation;
|
||||
};
|
||||
|
||||
public:
|
||||
MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {}
|
||||
MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy
|
||||
|
||||
struct AllocatorReservation
|
||||
{
|
||||
MTL::Buffer* mtlBuffer;
|
||||
uint8* memPtr;
|
||||
uint32 bufferOffset;
|
||||
uint32 size;
|
||||
uint32 bufferIndex;
|
||||
};
|
||||
|
||||
AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment);
|
||||
void FreeReservation(AllocatorReservation* uploadReservation);
|
||||
void FlushReservation(AllocatorReservation* uploadReservation);
|
||||
|
||||
void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
|
||||
|
||||
void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
|
||||
private:
|
||||
const class MetalRenderer* m_mtlr;
|
||||
MetalBufferChunkedHeap m_chunkedHeap;
|
||||
// allocations
|
||||
std::vector<TrackedAllocation> m_activeAllocations;
|
||||
MemoryPool<AllocatorReservation> m_poolAllocatorReservation{32};
|
||||
// release queue
|
||||
std::unordered_map<MTL::CommandBuffer*, std::vector<CHAddr>> m_releaseQueue;
|
||||
};
|
||||
|
|
|
@ -73,20 +73,15 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si
|
|||
|
||||
if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
|
||||
{
|
||||
auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
|
||||
auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
|
||||
memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
|
||||
auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();
|
||||
|
||||
// Lock the buffer to make sure it's not deallocated before the copy is done
|
||||
m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
|
||||
auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1);
|
||||
memcpy(allocation.memPtr, data, size);
|
||||
m_stagingAllocator.FlushReservation(allocation);
|
||||
|
||||
m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
|
||||
blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size);
|
||||
|
||||
// Make sure the buffer has the right command buffer
|
||||
m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
|
||||
|
||||
// We can now safely unlock the buffer
|
||||
m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
|
||||
//m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -7,22 +7,17 @@
|
|||
class MetalMemoryManager
|
||||
{
|
||||
public:
|
||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer) {}
|
||||
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {}
|
||||
~MetalMemoryManager();
|
||||
|
||||
MetalDefaultBufferAllocator& GetBufferAllocator()
|
||||
MetalSynchronizedRingAllocator& GetStagingAllocator()
|
||||
{
|
||||
return m_bufferAllocator;
|
||||
return m_stagingAllocator;
|
||||
}
|
||||
|
||||
MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator()
|
||||
MetalSynchronizedHeapAllocator& GetIndexAllocator()
|
||||
{
|
||||
return m_framePersistentBufferAllocator;
|
||||
}
|
||||
|
||||
MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator()
|
||||
{
|
||||
return m_tempBufferAllocator;
|
||||
return m_indexAllocator;
|
||||
}
|
||||
|
||||
MTL::Buffer* GetBufferCache()
|
||||
|
@ -30,6 +25,12 @@ public:
|
|||
return m_bufferCache;
|
||||
}
|
||||
|
||||
void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer)
|
||||
{
|
||||
m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer);
|
||||
m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer);
|
||||
}
|
||||
|
||||
// Texture upload buffer
|
||||
void* AcquireTextureUploadBuffer(size_t size);
|
||||
void ReleaseTextureUploadBuffer(uint8* mem);
|
||||
|
@ -65,9 +66,8 @@ private:
|
|||
|
||||
std::vector<uint8> m_textureUploadBuffer;
|
||||
|
||||
MetalDefaultBufferAllocator m_bufferAllocator;
|
||||
MetalDefaultBufferAllocator m_framePersistentBufferAllocator;
|
||||
MetalTemporaryBufferAllocator m_tempBufferAllocator;
|
||||
MetalSynchronizedRingAllocator m_stagingAllocator;
|
||||
MetalSynchronizedHeapAllocator m_indexAllocator;
|
||||
|
||||
MTL::Buffer* m_bufferCache = nullptr;
|
||||
BufferCacheMode m_bufferCacheMode;
|
||||
|
|
|
@ -3,8 +3,6 @@
|
|||
class MetalPerformanceMonitor
|
||||
{
|
||||
public:
|
||||
size_t m_bufferAllocatorMemory = 0;
|
||||
|
||||
// Per frame data
|
||||
uint32 m_commandBuffers = 0;
|
||||
uint32 m_renderPasses = 0;
|
||||
|
|
|
@ -16,13 +16,12 @@
|
|||
|
||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteIndices.h"
|
||||
#include "Cemu/Logging/CemuDebugLogging.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
||||
#include "Cemu/Logging/CemuLogging.h"
|
||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
||||
#include "Common/precompiled.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
|
||||
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
|
||||
#include "Metal/MTLCaptureManager.hpp"
|
||||
#include "config/CemuConfig.h"
|
||||
#include "gui/guiWrapper.h"
|
||||
|
||||
|
@ -191,6 +190,7 @@ MetalRenderer::MetalRenderer()
|
|||
utilityLibrary->release();
|
||||
|
||||
// HACK: for some reason, this variable ends up being initialized to some garbage data, even though its declared as bool m_captureFrame = false;
|
||||
m_occlusionQuery.m_lastCommandBuffer = nullptr;
|
||||
m_captureFrame = false;
|
||||
}
|
||||
|
||||
|
@ -302,12 +302,6 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
|
|||
// Reset the command buffers (they are released by TemporaryBufferAllocator)
|
||||
CommitCommandBuffer();
|
||||
|
||||
// Release frame persistent buffers
|
||||
m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
|
||||
|
||||
// Unlock all temporary buffers
|
||||
m_memoryManager->GetTemporaryBufferAllocator().EndFrame();
|
||||
|
||||
// Debug
|
||||
m_performanceMonitor.ResetPerFrameData();
|
||||
|
||||
|
@ -593,7 +587,6 @@ void MetalRenderer::AppendOverlayDebugInfo()
|
|||
|
||||
ImGui::Text("--- Metal info ---");
|
||||
ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize());
|
||||
ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);
|
||||
|
||||
ImGui::Text("--- Metal info (per frame) ---");
|
||||
ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers);
|
||||
|
@ -601,6 +594,31 @@ void MetalRenderer::AppendOverlayDebugInfo()
|
|||
ImGui::Text("Clears %u", m_performanceMonitor.m_clears);
|
||||
ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws);
|
||||
ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans);
|
||||
|
||||
ImGui::Text("--- Cache debug info ---");
|
||||
|
||||
uint32 bufferCacheHeapSize = 0;
|
||||
uint32 bufferCacheAllocationSize = 0;
|
||||
uint32 bufferCacheNumAllocations = 0;
|
||||
|
||||
LatteBufferCache_getStats(bufferCacheHeapSize, bufferCacheAllocationSize, bufferCacheNumAllocations);
|
||||
|
||||
ImGui::Text("Buffer");
|
||||
ImGui::SameLine(60.0f);
|
||||
ImGui::Text("%06uKB / %06uKB Allocs: %u", (uint32)(bufferCacheAllocationSize + 1023) / 1024, ((uint32)bufferCacheHeapSize + 1023) / 1024, (uint32)bufferCacheNumAllocations);
|
||||
|
||||
uint32 numBuffers;
|
||||
size_t totalSize, freeSize;
|
||||
|
||||
m_memoryManager->GetStagingAllocator().GetStats(numBuffers, totalSize, freeSize);
|
||||
ImGui::Text("Staging");
|
||||
ImGui::SameLine(60.0f);
|
||||
ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers);
|
||||
|
||||
m_memoryManager->GetIndexAllocator().GetStats(numBuffers, totalSize, freeSize);
|
||||
ImGui::Text("Index");
|
||||
ImGui::SameLine(60.0f);
|
||||
ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers);
|
||||
}
|
||||
|
||||
void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
|
||||
|
@ -682,17 +700,17 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s
|
|||
auto blitCommandEncoder = GetBlitCommandEncoder();
|
||||
|
||||
// Allocate a temporary buffer
|
||||
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
|
||||
auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize);
|
||||
auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex);
|
||||
auto& bufferAllocator = m_memoryManager->GetStagingAllocator();
|
||||
auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1);
|
||||
bufferAllocator.FlushReservation(allocation);
|
||||
|
||||
// Copy the data to the temporary buffer
|
||||
memcpy(allocation.data, pixelData, compressedImageSize);
|
||||
memcpy(allocation.memPtr, pixelData, compressedImageSize);
|
||||
//buffer->didModifyRange(NS::Range(allocation.offset, allocation.size));
|
||||
|
||||
// TODO: specify blit options when copying to a depth stencil texture?
|
||||
// Copy the data from the temporary buffer to the texture
|
||||
blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
|
||||
blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
|
||||
//}
|
||||
}
|
||||
|
||||
|
@ -1067,9 +1085,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
uint32 hostIndexCount;
|
||||
uint32 indexMin = 0;
|
||||
uint32 indexMax = 0;
|
||||
uint32 indexBufferOffset = 0;
|
||||
uint32 indexBufferIndex = 0;
|
||||
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
|
||||
Renderer::IndexAllocation indexAllocation;
|
||||
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation);
|
||||
auto indexAllocationMtl = static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(indexAllocation.rendererInternal);
|
||||
|
||||
// Buffer cache
|
||||
if (m_memoryManager->UseHostMemoryForCache())
|
||||
|
@ -1308,20 +1326,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader);
|
||||
|
||||
// Draw
|
||||
MTL::Buffer* indexBuffer = nullptr;
|
||||
if (hostIndexType != INDEX_TYPE::NONE)
|
||||
{
|
||||
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
|
||||
indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex);
|
||||
|
||||
// We have already retrieved the buffer, no need for it to be locked anymore
|
||||
bufferAllocator.UnlockBuffer(indexBufferIndex);
|
||||
}
|
||||
|
||||
if (usesGeometryShader)
|
||||
{
|
||||
if (indexBuffer)
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
|
||||
if (hostIndexType != INDEX_TYPE::NONE)
|
||||
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, vertexShader->resourceMapping.indexBufferBinding);
|
||||
|
||||
uint8 hostIndexTypeU8 = (uint8)hostIndexType;
|
||||
renderCommandEncoder->setObjectBytes(&hostIndexTypeU8, sizeof(hostIndexTypeU8), vertexShader->resourceMapping.indexTypeBinding);
|
||||
|
@ -1349,10 +1357,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
|||
}
|
||||
else
|
||||
{
|
||||
if (indexBuffer)
|
||||
if (hostIndexType != INDEX_TYPE::NONE)
|
||||
{
|
||||
auto mtlIndexType = GetMtlIndexType(hostIndexType);
|
||||
renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
|
||||
renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, instanceCount, baseVertex, baseInstance);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1492,29 +1500,21 @@ void MetalRenderer::draw_handleSpecialState5()
|
|||
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3));
|
||||
}
|
||||
|
||||
void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
|
||||
Renderer::IndexAllocation MetalRenderer::indexData_reserveIndexMemory(uint32 size)
|
||||
{
|
||||
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
|
||||
auto allocation = bufferAllocator.GetBufferAllocation(size);
|
||||
offset = allocation.offset;
|
||||
bufferIndex = allocation.bufferIndex;
|
||||
auto allocation = m_memoryManager->GetIndexAllocator().AllocateBufferMemory(size, 128);
|
||||
|
||||
// Lock the buffer so that it doesn't get released
|
||||
bufferAllocator.LockBuffer(allocation.bufferIndex);
|
||||
|
||||
return allocation.data;
|
||||
return {allocation->memPtr, allocation};
|
||||
}
|
||||
|
||||
void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size)
|
||||
void MetalRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation)
|
||||
{
|
||||
// Do nothing
|
||||
/*
|
||||
if (!HasUnifiedMemory())
|
||||
{
|
||||
auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex);
|
||||
buffer->didModifyRange(NS::Range(offset, size));
|
||||
}
|
||||
*/
|
||||
m_memoryManager->GetIndexAllocator().FreeReservation(static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(allocation.rendererInternal));
|
||||
}
|
||||
|
||||
void MetalRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation)
|
||||
{
|
||||
m_memoryManager->GetIndexAllocator().FlushReservation(static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(allocation.rendererInternal));
|
||||
}
|
||||
|
||||
LatteQueryObject* MetalRenderer::occlusionQuery_create() {
|
||||
|
@ -1652,9 +1652,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
|
|||
m_recordedDrawcalls = 0;
|
||||
m_commitTreshold = m_defaultCommitTreshlod;
|
||||
|
||||
// Notify memory manager about the new command buffer
|
||||
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer);
|
||||
|
||||
// Debug
|
||||
m_performanceMonitor.m_commandBuffers++;
|
||||
|
||||
|
@ -1835,8 +1832,6 @@ void MetalRenderer::CommitCommandBuffer()
|
|||
|
||||
m_executingCommandBuffers.push_back(mtlCommandBuffer);
|
||||
|
||||
m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr);
|
||||
|
||||
// Debug
|
||||
//m_commandQueue->insertDebugCaptureBoundary();
|
||||
}
|
||||
|
@ -1845,26 +1840,20 @@ void MetalRenderer::CommitCommandBuffer()
|
|||
void MetalRenderer::ProcessFinishedCommandBuffers()
|
||||
{
|
||||
// Check for finished command buffers
|
||||
bool atLeastOneCompleted = false;
|
||||
for (auto it = m_executingCommandBuffers.begin(); it != m_executingCommandBuffers.end();)
|
||||
{
|
||||
auto commandBuffer = *it;
|
||||
if (CommandBufferCompleted(commandBuffer))
|
||||
{
|
||||
m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer);
|
||||
m_memoryManager->CleanupBuffers(commandBuffer);
|
||||
commandBuffer->release();
|
||||
it = m_executingCommandBuffers.erase(it);
|
||||
atLeastOneCompleted = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate indices if at least one command buffer has completed
|
||||
if (atLeastOneCompleted)
|
||||
LatteIndices_invalidateAll();
|
||||
}
|
||||
|
||||
bool MetalRenderer::AcquireDrawable(bool mainWindow)
|
||||
|
@ -2102,15 +2091,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
|||
}
|
||||
}
|
||||
|
||||
auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
|
||||
size_t size = shader->uniform.uniformRangeSize;
|
||||
auto supportBuffer = bufferAllocator.GetBufferAllocation(size);
|
||||
memcpy(supportBuffer.data, supportBufferData, size);
|
||||
auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex);
|
||||
//if (!HasUnifiedMemory())
|
||||
// buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
|
||||
auto& bufferAllocator = m_memoryManager->GetStagingAllocator();
|
||||
auto allocation = bufferAllocator.AllocateBufferMemory(size, 1);
|
||||
memcpy(allocation.memPtr, supportBufferData, size);
|
||||
bufferAllocator.FlushReservation(allocation);
|
||||
|
||||
SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint);
|
||||
SetBuffer(renderCommandEncoder, mtlShaderType, allocation.mtlBuffer, allocation.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint);
|
||||
}
|
||||
|
||||
// Uniform buffers
|
||||
|
|
|
@ -7,19 +7,6 @@
|
|||
#include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
|
||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"
|
||||
|
||||
struct MetalBufferAllocation
|
||||
{
|
||||
void* data;
|
||||
uint32 bufferIndex;
|
||||
size_t offset = INVALID_OFFSET;
|
||||
size_t size;
|
||||
|
||||
bool IsValid() const
|
||||
{
|
||||
return offset != INVALID_OFFSET;
|
||||
}
|
||||
};
|
||||
|
||||
enum MetalGeneralShaderType
|
||||
{
|
||||
METAL_GENERAL_SHADER_TYPE_VERTEX,
|
||||
|
@ -271,8 +258,9 @@ public:
|
|||
void draw_handleSpecialState5();
|
||||
|
||||
// index
|
||||
void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override;
|
||||
void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override;
|
||||
IndexAllocation indexData_reserveIndexMemory(uint32 size) override;
|
||||
void indexData_releaseIndexMemory(IndexAllocation& allocation) override;
|
||||
void indexData_uploadIndexMemory(IndexAllocation& allocation) override;
|
||||
|
||||
// occlusion queries
|
||||
LatteQueryObject* occlusionQuery_create() override;
|
||||
|
@ -294,14 +282,14 @@ public:
|
|||
return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited);
|
||||
}
|
||||
|
||||
MTL::CommandBuffer* GetCurrentCommandBuffer()
|
||||
MTL::CommandBuffer* GetCurrentCommandBuffer() const
|
||||
{
|
||||
cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer);
|
||||
|
||||
return m_currentCommandBuffer.m_commandBuffer;
|
||||
}
|
||||
|
||||
MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted()
|
||||
MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const
|
||||
{
|
||||
// The command buffer has been commited and has finished execution
|
||||
if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0)
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512)
|
||||
|
||||
static bool s_isLoadingShadersMtl{false};
|
||||
static bool s_hasRAMFilesystem{false};
|
||||
class FileCache* s_airCache{nullptr};
|
||||
//static bool s_hasRAMFilesystem{false};
|
||||
//class FileCache* s_airCache{nullptr};
|
||||
|
||||
extern std::atomic_int g_compiled_shaders_total;
|
||||
extern std::atomic_int g_compiled_shaders_async;
|
||||
|
@ -190,6 +190,7 @@ void RendererShaderMtl::ShaderCacheLoading_end()
|
|||
void RendererShaderMtl::ShaderCacheLoading_Close()
|
||||
{
|
||||
// Close the AIR cache
|
||||
/*
|
||||
if (s_airCache)
|
||||
{
|
||||
delete s_airCache;
|
||||
|
@ -197,7 +198,6 @@ void RendererShaderMtl::ShaderCacheLoading_Close()
|
|||
}
|
||||
|
||||
// Close RAM filesystem
|
||||
/*
|
||||
if (s_hasRAMFilesystem)
|
||||
executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH);
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue