Merge pull request #19 from SamoZ256/metal-allocator-rework

Allocator rework
2025-07-05 14:31:17 +12:00 · 2025-01-18 19:18:24 +01:00 · 2025-01-18 19:18:24 +01:00 · f0cf61461c
commit f0cf61461c
parent 800aae4fdb bf93f90739
9 changed files with 445 additions and 450 deletions
--- a/src/Cafe/CMakeLists.txt
+++ b/src/Cafe/CMakeLists.txt
@ -555,6 +555,7 @@ if(ENABLE_METAL)
        HW/Latte/Renderer/Metal/CachedFBOMtl.h
        HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp
        HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h
+        HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
        HW/Latte/Renderer/Metal/MetalBufferAllocator.h
        HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
        HW/Latte/Renderer/Metal/MetalMemoryManager.h
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp
@ -0,0 +1,217 @@
+#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
+
+MetalBufferChunkedHeap::~MetalBufferChunkedHeap()
+{
+	for (auto& chunk : m_chunkBuffers)
+		chunk->release();
+}
+
+uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize)
+{
+	size_t allocationSize = std::max<size_t>(m_minimumBufferAllocationSize, minimumAllocationSize);
+	MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
+	cemu_assert_debug(buffer);
+	cemu_assert_debug(m_chunkBuffers.size() == chunkIndex);
+	m_chunkBuffers.emplace_back(buffer);
+
+	return allocationSize;
+}
+
+void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset)
+{
+	auto commandBuffer = m_mtlr->GetCurrentCommandBuffer();
+	if (commandBuffer == buffer.lastSyncpointCommandBuffer)
+		return;
+	buffer.lastSyncpointCommandBuffer = commandBuffer;
+	buffer.queue_syncPoints.emplace(commandBuffer, offset);
+}
+
+void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc)
+{
+	// calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc
+	uint32 bufferAllocSize = m_minimumBufferAllocSize;
+	while (bufferAllocSize < sizeRequiredForAlloc)
+		bufferAllocSize += m_minimumBufferAllocSize;
+
+	AllocatorBuffer_t newBuffer{};
+	newBuffer.writeIndex = 0;
+	newBuffer.basePtr = nullptr;
+	newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options);
+	newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents();
+	newBuffer.size = bufferAllocSize;
+	newBuffer.index = (uint32)m_buffers.size();
+	m_buffers.push_back(newBuffer);
+}
+
+MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
+{
+	if (alignment < 128)
+		alignment = 128;
+	size = (size + 127) & ~127;
+
+	for (auto& itr : m_buffers)
+	{
+		// align pointer
+		uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment;
+		uint32 distanceToSyncPoint;
+		if (!itr.queue_syncPoints.empty())
+		{
+			if (itr.queue_syncPoints.front().offset < itr.writeIndex)
+				distanceToSyncPoint = 0xFFFFFFFF;
+			else
+				distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
+		}
+		else
+			distanceToSyncPoint = 0xFFFFFFFF;
+		uint32 spaceNeeded = alignmentPadding + size;
+		if (spaceNeeded > distanceToSyncPoint)
+			continue; // not enough space in current buffer
+		if ((itr.writeIndex + spaceNeeded) > itr.size)
+		{
+			// wrap-around
+			spaceNeeded = size;
+			alignmentPadding = 0;
+			// check if there is enough space in current buffer after wrap-around
+			if (!itr.queue_syncPoints.empty())
+			{
+				distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0;
+				if (spaceNeeded > distanceToSyncPoint)
+					continue;
+			}
+			else if (spaceNeeded > itr.size)
+				continue;
+			itr.writeIndex = 0;
+		}
+		addUploadBufferSyncPoint(itr, itr.writeIndex);
+		itr.writeIndex += alignmentPadding;
+		uint32 offset = itr.writeIndex;
+		itr.writeIndex += size;
+		itr.cleanupCounter = 0;
+		MetalSynchronizedRingAllocator::AllocatorReservation_t res;
+		res.mtlBuffer = itr.mtlBuffer;
+		res.memPtr = itr.basePtr + offset;
+		res.bufferOffset = offset;
+		res.size = size;
+		res.bufferIndex = itr.index;
+
+		return res;
+	}
+
+	// allocate new buffer
+	allocateAdditionalUploadBuffer(size);
+
+	return AllocateBufferMemory(size, alignment);
+}
+
+void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation)
+{
+    if (RequiresFlush())
+    {
+        uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size));
+    }
+}
+
+void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
+{
+	for (auto& itr : m_buffers)
+	{
+		while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer)
+		{
+			itr.queue_syncPoints.pop();
+		}
+		if (itr.queue_syncPoints.empty())
+			itr.cleanupCounter++;
+	}
+
+	// check if last buffer is available for deletion
+	if (m_buffers.size() >= 2)
+	{
+		auto& lastBuffer = m_buffers.back();
+		if (lastBuffer.cleanupCounter >= 1000)
+		{
+			// release buffer
+			lastBuffer.mtlBuffer->release();
+			m_buffers.pop_back();
+		}
+	}
+}
+
+MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const
+{
+	return m_buffers[index].mtlBuffer;
+}
+
+void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
+{
+	numBuffers = (uint32)m_buffers.size();
+	totalBufferSize = 0;
+	freeBufferSize = 0;
+	for (auto& itr : m_buffers)
+	{
+		totalBufferSize += itr.size;
+		// calculate free space in buffer
+		uint32 distanceToSyncPoint;
+		if (!itr.queue_syncPoints.empty())
+		{
+			if (itr.queue_syncPoints.front().offset < itr.writeIndex)
+				distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around
+			else
+				distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex;
+		}
+		else
+			distanceToSyncPoint = itr.size;
+		freeBufferSize += distanceToSyncPoint;
+	}
+}
+
+/* MetalSynchronizedHeapAllocator */
+
+MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment)
+{
+	CHAddr addr = m_chunkedHeap.alloc(size, alignment);
+	m_activeAllocations.emplace_back(addr);
+	AllocatorReservation* res = m_poolAllocatorReservation.allocObj();
+	res->bufferIndex = addr.chunkIndex;
+	res->bufferOffset = addr.offset;
+	res->size = size;
+	res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex);
+	res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset;
+
+	return res;
+}
+
+void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation)
+{
+	// put the allocation on a delayed release queue for the current command buffer
+	MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer();
+	auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; });
+	cemu_assert_debug(it != m_activeAllocations.end());
+	m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation);
+	m_activeAllocations.erase(it);
+	m_poolAllocatorReservation.freeObj(uploadReservation);
+}
+
+void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation)
+{
+	if (m_chunkedHeap.RequiresFlush())
+	{
+	    uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size));
+	}
+}
+
+void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer)
+{
+    auto it = m_releaseQueue.find(latestFinishedCommandBuffer);
+    if (it == m_releaseQueue.end())
+        return;
+
+    // release allocations
+	for (auto& addr : it->second)
+		m_chunkedHeap.free(addr);
+	m_releaseQueue.erase(it);
+}
+
+void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
+{
+	m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize);
+}
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h
@ -1,354 +1,163 @@
 #pragma once

 #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
-#include "Common/precompiled.h"
 #include "Metal/MTLResource.hpp"
+#include "util/ChunkedHeap/ChunkedHeap.h"
+#include "util/helpers/MemoryPool.h"
+
 #include <utility>

-struct MetalBufferRange
+inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options)
 {
-    size_t offset;
-    size_t size;
-};
+    if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged)
+        options |= MTL::ResourceCPUCacheModeWriteCombined;

-constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024; // 8 MB
-constexpr size_t MAX_ALLOCATION_SIZE = 64 * 1024 * 1024; // 64 MB
+    return options;
+}

-void LatteIndices_invalidateAll();
-
-template<typename BufferT>
-class MetalBufferAllocator
+class MetalBufferChunkedHeap : private ChunkedHeap<>
 {
  public:
-    struct Buffer
+	MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { };
+	~MetalBufferChunkedHeap();
+
+	using ChunkedHeap::alloc;
+	using ChunkedHeap::free;
+
+	uint8* GetChunkPtr(uint32 index) const
 	{
-        MTL::Buffer* m_buffer;
-        std::vector<MetalBufferRange> m_freeRanges;
-        BufferT m_data;
-    };
+		if (index >= m_chunkBuffers.size())
+			return nullptr;

-    MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} {
-        m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged);
-
-        m_options = storageMode;
-        if (m_isCPUAccessible)
-            m_options |= MTL::ResourceCPUCacheModeWriteCombined;
+		return (uint8*)m_chunkBuffers[index]->contents();
 	}

-    ~MetalBufferAllocator()
+	MTL::Buffer* GetBufferByIndex(uint32 index) const
    {
-        for (auto buffer : m_buffers)
-        {
-            buffer.m_buffer->release();
-        }
+        cemu_assert_debug(index < m_chunkBuffers.size());
+
+        return m_chunkBuffers[index];
    }

-    void ResetAllocations()
+    bool RequiresFlush() const
    {
-        for (uint32 i = 0; i < m_buffers.size(); i++)
-            FreeBuffer(i);
+        return m_options & MTL::ResourceStorageModeManaged;
    }

-    MTL::Buffer* GetBuffer(uint32 bufferIndex)
+	void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const
 	{
-        return m_buffers[bufferIndex].m_buffer;
+		numBuffers = m_chunkBuffers.size();
+		totalBufferSize = m_numHeapBytes;
+		freeBufferSize = m_numHeapBytes - m_numAllocatedBytes;
 	}

-    MetalBufferAllocation GetBufferAllocation(size_t size)
-    {
-        // Align the size
-        size = Align(size, 128);
-
-        // First, try to find a free range
-        for (uint32 i = 0; i < m_buffers.size(); i++)
-        {
-            auto& buffer = m_buffers[i];
-            for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++)
-            {
-                auto& range = buffer.m_freeRanges[j];
-                if (size <= range.size)
-                {
-                    MetalBufferAllocation allocation;
-                    allocation.bufferIndex = i;
-                    allocation.offset = range.offset;
-                    allocation.size = size;
-                    allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr);
-
-                    range.offset += size;
-                    range.size -= size;
-
-                    if (range.size == 0)
-                    {
-                        buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j);
-                    }
-
-                    return allocation;
-                }
-            }
-        }
-
-        // If no free range was found, allocate a new buffer
-        size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size());
-        allocationSize = std::min(allocationSize, MAX_ALLOCATION_SIZE); // Limit the allocation size
-        allocationSize = std::max(allocationSize, size);
-        MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options);
-    #ifdef CEMU_DEBUG_ASSERT
-        mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer));
-    #endif
-
-        MetalBufferAllocation allocation;
-        allocation.bufferIndex = m_buffers.size();
-        allocation.offset = 0;
-        allocation.size = size;
-        allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr);
-
-        m_buffers.push_back({mtlBuffer});
-        auto& buffer = m_buffers.back();
-
-        // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges
-        if (size < allocationSize)
-        {
-            MetalBufferRange range;
-            range.offset = size;
-            range.size = allocationSize - size;
-
-            buffer.m_freeRanges.push_back(range);
-        }
-
-        // Debug
-        m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize;
-
-        return allocation;
-    }
-
-    void FreeAllocation(MetalBufferAllocation& allocation)
-    {
-        MetalBufferRange range;
-        range.offset = allocation.offset;
-        range.size = allocation.size;
-
-        allocation.offset = INVALID_OFFSET;
-
-        // Find the correct position to insert the free range
-        auto& buffer = m_buffers[allocation.bufferIndex];
-        for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++)
-        {
-            auto& freeRange = buffer.m_freeRanges[i];
-            if (freeRange.offset + freeRange.size == range.offset)
-            {
-                freeRange.size += range.size;
-                return;
-            }
-        }
-
-        buffer.m_freeRanges.push_back(range);
-    }
-
-protected:
-    class MetalRenderer* m_mtlr;
-
-    // TODO: make these template arguments
-    bool m_isCPUAccessible;
-    MTL::ResourceOptions m_options;
-
-    std::vector<Buffer> m_buffers;
-
-    void FreeBuffer(uint32 bufferIndex)
-    {
-        auto& buffer = m_buffers[bufferIndex];
-        buffer.m_freeRanges.clear();
-        buffer.m_freeRanges.push_back({0, buffer.m_buffer->length()});
-    }
-};
-
-struct Empty {};
-typedef MetalBufferAllocator<Empty> MetalDefaultBufferAllocator;
-
-struct MetalSyncedBuffer
-{
-    uint32 m_commandBufferCount = 0;
-    MTL::CommandBuffer* m_lastCommandBuffer = nullptr;
-    uint32 m_lock = 0;
-
-    bool IsLocked() const
-    {
-        return (m_lock != 0);
-    }
-};
-
-constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024;
-
-class MetalTemporaryBufferAllocator : public MetalBufferAllocator<MetalSyncedBuffer>
-{
-public:
-    MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator<MetalSyncedBuffer>(metalRenderer, MTL::ResourceStorageModeShared) {}
-
-    void LockBuffer(uint32 bufferIndex)
-    {
-        m_buffers[bufferIndex].m_data.m_lock++;
-    }
-
-    void UnlockBuffer(uint32 bufferIndex)
-    {
-        auto& buffer = m_buffers[bufferIndex];
-
-        buffer.m_data.m_lock--;
-
-        // Release the buffer if it wasn't released due to the lock
-        if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0)
-            FreeBuffer(bufferIndex);
-    }
-
-    void EndFrame()
-    {
-        // Unlock all buffers
-        for (uint32_t i = 0; i < m_buffers.size(); i++)
-        {
-            auto& buffer = m_buffers[i];
-
-            if (buffer.m_data.IsLocked())
-            {
-                if (buffer.m_data.m_commandBufferCount == 0)
-                    FreeBuffer(i);
-
-                buffer.m_data.m_lock = 0;
-            }
-        }
-
-        // TODO: do this for other buffer allocators as well?
-        // Track how many frames have passed since the last access to the back buffer
-        if (!m_buffers.empty())
-        {
-            auto& backBuffer = m_buffers.back();
-            if (backBuffer.m_data.m_commandBufferCount == 0)
-            {
-                // Release the back buffer if it hasn't been accessed for a while
-                if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD)
-                {
-                    // Debug
-                    m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length();
-
-                    backBuffer.m_buffer->release();
-                    m_buffers.pop_back();
-
-                    m_framesSinceBackBufferAccess = 0;
-                }
-                else
-                {
-                    m_framesSinceBackBufferAccess++;
-                }
-            }
-            else
-            {
-                m_framesSinceBackBufferAccess = 0;
-            }
-        }
-    }
-
-    void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer)
-    {
-        m_activeCommandBuffer = commandBuffer;
-        if (commandBuffer)
-        {
-            auto result = m_executingCommandBuffers.emplace(std::make_pair(m_activeCommandBuffer, std::vector<uint32>{}));
-            cemu_assert_debug(result.second);
-            m_activeCommandBufferIt = result.first;
-        }
-        else
-        {
-            m_activeCommandBufferIt = m_executingCommandBuffers.end();
-        }
-    }
-
-    void CommandBufferFinished(MTL::CommandBuffer* commandBuffer)
-    {
-        auto it = m_executingCommandBuffers.find(commandBuffer);
-        for (auto bufferIndex : it->second)
-        {
-            auto& buffer = m_buffers[bufferIndex];
-            buffer.m_data.m_commandBufferCount--;
-
-            // TODO: is this neccessary?
-            if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0)
-                FreeBuffer(bufferIndex);
-        }
-
-        m_executingCommandBuffers.erase(it);
-    }
-
-    MTL::Buffer* GetBuffer(uint32 bufferIndex)
-    {
-        cemu_assert_debug(m_activeCommandBuffer);
-
-        auto& buffer = m_buffers[bufferIndex];
-        if (buffer.m_data.m_commandBufferCount == 0 || buffer.m_data.m_lastCommandBuffer != m_activeCommandBuffer)
-        {
-            m_activeCommandBufferIt->second.push_back(bufferIndex);
-            buffer.m_data.m_commandBufferCount++;
-            buffer.m_data.m_lastCommandBuffer = m_activeCommandBuffer;
-        }
-
-        return buffer.m_buffer;
-    }
-
-    MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex)
-    {
-        return m_buffers[bufferIndex].m_buffer;
-    }
-
-    /*
-    MetalBufferAllocation GetBufferAllocation(size_t size)
-    {
-        if (!m_activeCommandBuffer)
-            throw std::runtime_error("No active command buffer when allocating a buffer!");
-
-        auto allocation = MetalBufferAllocator<MetalSyncedBuffer>::GetBufferAllocation(size);
-
-        auto& buffer = m_buffers[allocation.bufferIndex];
-        if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer)
-            buffer.m_commandBuffers.push_back(m_activeCommandBuffer);
-
-        return allocation;
-    }
-    */
-
-    // For debugging
-    /*
-    void LogInfo()
-    {
-        debug_printf("BUFFERS:\n");
-        for (auto& buffer : m_buffers)
-        {
-            debug_printf("  %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size());
-            uint32 same = 0;
-            uint32 completed = 0;
-            for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++)
-            {
-                if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i]))
-                    completed++;
-                for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++)
-                {
-                    if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j])
-                        same++;
-                }
-            }
-            debug_printf("  same: %u\n", same);
-            debug_printf("  completed: %u\n", completed);
-
-            debug_printf("  FREE RANGES:\n");
-            for (auto& range : buffer.m_freeRanges)
-            {
-                debug_printf("    offset: %zu, size: %zu\n", range.offset, range.size);
-            }
-        }
-    }
-    */
-
  private:
-    MTL::CommandBuffer* m_activeCommandBuffer = nullptr;
+	uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override;

-    std::map<MTL::CommandBuffer*, std::vector<uint32>> m_executingCommandBuffers;
-    std::map<MTL::CommandBuffer*, std::vector<uint32>>::iterator m_activeCommandBufferIt;
+	const class MetalRenderer* m_mtlr;

-    uint16 m_framesSinceBackBufferAccess = 0;
+	MTL::ResourceOptions m_options;
+	size_t m_minimumBufferAllocationSize;
+
+	std::vector<MTL::Buffer*> m_chunkBuffers;
+};
+
+// a circular ring-buffer which tracks and releases memory per command-buffer
+class MetalSynchronizedRingAllocator
+{
+public:
+	MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
+	MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy
+
+	struct BufferSyncPoint_t
+	{
+		// todo - modularize sync point
+		MTL::CommandBuffer* commandBuffer;
+		uint32 offset;
+
+		BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {};
+	};
+
+	struct AllocatorBuffer_t
+	{
+		MTL::Buffer* mtlBuffer;
+		uint8* basePtr;
+		uint32 size;
+		uint32 writeIndex;
+		std::queue<BufferSyncPoint_t> queue_syncPoints;
+		MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr };
+		uint32 index;
+		uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released
+	};
+
+	struct AllocatorReservation_t
+	{
+		MTL::Buffer* mtlBuffer;
+		uint8* memPtr;
+		uint32 bufferOffset;
+		uint32 size;
+		uint32 bufferIndex;
+	};
+
+	AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment);
+	void FlushReservation(AllocatorReservation_t& uploadReservation);
+	void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
+	MTL::Buffer* GetBufferByIndex(uint32 index) const;
+
+    bool RequiresFlush() const
+    {
+        return m_options & MTL::ResourceStorageModeManaged;
+    }
+
+	void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
+
+private:
+	void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc);
+	void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset);
+
+	const class MetalRenderer* m_mtlr;
+
+	MTL::ResourceOptions m_options;
+	const uint32 m_minimumBufferAllocSize;
+
+	std::vector<AllocatorBuffer_t> m_buffers;
+};
+
+// heap style allocator with released memory being freed after the current command buffer finishes
+class MetalSynchronizedHeapAllocator
+{
+	struct TrackedAllocation
+	{
+		TrackedAllocation(CHAddr allocation) : allocation(allocation) {};
+		CHAddr allocation;
+	};
+
+  public:
+	MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {}
+	MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy
+
+	struct AllocatorReservation
+	{
+		MTL::Buffer* mtlBuffer;
+		uint8* memPtr;
+		uint32 bufferOffset;
+		uint32 size;
+		uint32 bufferIndex;
+	};
+
+	AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment);
+	void FreeReservation(AllocatorReservation* uploadReservation);
+	void FlushReservation(AllocatorReservation* uploadReservation);
+
+	void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer);
+
+	void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const;
+  private:
+	const class MetalRenderer* m_mtlr;
+	MetalBufferChunkedHeap m_chunkedHeap;
+	// allocations
+	std::vector<TrackedAllocation> m_activeAllocations;
+	MemoryPool<AllocatorReservation> m_poolAllocatorReservation{32};
+	// release queue
+	std::unordered_map<MTL::CommandBuffer*, std::vector<CHAddr>> m_releaseQueue;
 };
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
@ -73,20 +73,15 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si

    if (m_bufferCacheMode == BufferCacheMode::DevicePrivate)
    {
-        auto allocation = m_tempBufferAllocator.GetBufferAllocation(size);
-        auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex);
-        memcpy((uint8*)buffer->contents() + allocation.offset, data, size);
+        auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder();

-        // Lock the buffer to make sure it's not deallocated before the copy is done
-        m_tempBufferAllocator.LockBuffer(allocation.bufferIndex);
+        auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1);
+        memcpy(allocation.memPtr, data, size);
+        m_stagingAllocator.FlushReservation(allocation);

-        m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
+        blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size);

-        // Make sure the buffer has the right command buffer
-        m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this
-
-        // We can now safely unlock the buffer
-        m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex);
+        //m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES);
    }
    else
    {
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
@ -7,22 +7,17 @@
 class MetalMemoryManager
 {
 public:
-    MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer) {}
+    MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {}
    ~MetalMemoryManager();

-    MetalDefaultBufferAllocator& GetBufferAllocator()
+    MetalSynchronizedRingAllocator& GetStagingAllocator()
    {
-        return m_bufferAllocator;
+        return m_stagingAllocator;
    }

-    MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator()
+    MetalSynchronizedHeapAllocator& GetIndexAllocator()
    {
-        return m_framePersistentBufferAllocator;
-    }
-
-    MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator()
-    {
-        return m_tempBufferAllocator;
+        return m_indexAllocator;
    }

    MTL::Buffer* GetBufferCache()
@ -30,6 +25,12 @@ public:
        return m_bufferCache;
    }

+    void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer)
+    {
+        m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer);
+        m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer);
+    }
+
    // Texture upload buffer
    void* AcquireTextureUploadBuffer(size_t size);
    void ReleaseTextureUploadBuffer(uint8* mem);
@ -65,9 +66,8 @@ private:

    std::vector<uint8> m_textureUploadBuffer;

-    MetalDefaultBufferAllocator m_bufferAllocator;
-    MetalDefaultBufferAllocator m_framePersistentBufferAllocator;
-    MetalTemporaryBufferAllocator m_tempBufferAllocator;
+    MetalSynchronizedRingAllocator m_stagingAllocator;
+    MetalSynchronizedHeapAllocator m_indexAllocator;

    MTL::Buffer* m_bufferCache = nullptr;
    BufferCacheMode m_bufferCacheMode;
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h
@ -3,8 +3,6 @@
 class MetalPerformanceMonitor
 {
 public:
-    size_t m_bufferAllocatorMemory = 0;
-
    // Per frame data
    uint32 m_commandBuffers = 0;
    uint32 m_renderPasses = 0;
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
@ -16,13 +16,12 @@

 #include "Cafe/HW/Latte/Core/LatteShader.h"
 #include "Cafe/HW/Latte/Core/LatteIndices.h"
-#include "Cemu/Logging/CemuDebugLogging.h"
+#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
 #include "Cemu/Logging/CemuLogging.h"
 #include "Cafe/HW/Latte/Core/FetchShader.h"
 #include "Cafe/HW/Latte/Core/LatteConst.h"
-#include "Common/precompiled.h"
+#include "HW/Latte/Renderer/Metal/MetalBufferAllocator.h"
 #include "HW/Latte/Renderer/Metal/MetalCommon.h"
-#include "Metal/MTLCaptureManager.hpp"
 #include "config/CemuConfig.h"
 #include "gui/guiWrapper.h"

@ -191,6 +190,7 @@ MetalRenderer::MetalRenderer()
    utilityLibrary->release();

    // HACK: for some reason, this variable ends up being initialized to some garbage data, even though its declared as bool m_captureFrame = false;
+    m_occlusionQuery.m_lastCommandBuffer = nullptr;
    m_captureFrame = false;
 }

@ -302,12 +302,6 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC)
    // Reset the command buffers (they are released by TemporaryBufferAllocator)
    CommitCommandBuffer();

-    // Release frame persistent buffers
-    m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations();
-
-    // Unlock all temporary buffers
-    m_memoryManager->GetTemporaryBufferAllocator().EndFrame();
-
    // Debug
    m_performanceMonitor.ResetPerFrameData();

@ -593,7 +587,6 @@ void MetalRenderer::AppendOverlayDebugInfo()

    ImGui::Text("--- Metal info ---");
    ImGui::Text("Render pipeline states     %zu", m_pipelineCache->GetPipelineCacheSize());
-    ImGui::Text("Buffer allocator memory    %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);

    ImGui::Text("--- Metal info (per frame) ---");
    ImGui::Text("Command buffers            %u", m_performanceMonitor.m_commandBuffers);
@ -601,6 +594,31 @@ void MetalRenderer::AppendOverlayDebugInfo()
    ImGui::Text("Clears                     %u", m_performanceMonitor.m_clears);
    ImGui::Text("Manual vertex fetch draws  %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws);
    ImGui::Text("Triangle fans              %u", m_performanceMonitor.m_triangleFans);
+
+    ImGui::Text("--- Cache debug info ---");
+
+	uint32 bufferCacheHeapSize = 0;
+	uint32 bufferCacheAllocationSize = 0;
+	uint32 bufferCacheNumAllocations = 0;
+
+	LatteBufferCache_getStats(bufferCacheHeapSize, bufferCacheAllocationSize, bufferCacheNumAllocations);
+
+	ImGui::Text("Buffer");
+	ImGui::SameLine(60.0f);
+	ImGui::Text("%06uKB / %06uKB Allocs: %u", (uint32)(bufferCacheAllocationSize + 1023) / 1024, ((uint32)bufferCacheHeapSize + 1023) / 1024, (uint32)bufferCacheNumAllocations);
+
+	uint32 numBuffers;
+	size_t totalSize, freeSize;
+
+	m_memoryManager->GetStagingAllocator().GetStats(numBuffers, totalSize, freeSize);
+	ImGui::Text("Staging");
+	ImGui::SameLine(60.0f);
+	ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers);
+
+	m_memoryManager->GetIndexAllocator().GetStats(numBuffers, totalSize, freeSize);
+	ImGui::Text("Index");
+	ImGui::SameLine(60.0f);
+	ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers);
 }

 void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
@ -682,17 +700,17 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s
    auto blitCommandEncoder = GetBlitCommandEncoder();

    // Allocate a temporary buffer
-    auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
-    auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize);
-    auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex);
+    auto& bufferAllocator = m_memoryManager->GetStagingAllocator();
+    auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1);
+    bufferAllocator.FlushReservation(allocation);

    // Copy the data to the temporary buffer
-    memcpy(allocation.data, pixelData, compressedImageSize);
+    memcpy(allocation.memPtr, pixelData, compressedImageSize);
    //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size));

    // TODO: specify blit options when copying to a depth stencil texture?
    // Copy the data from the temporary buffer to the texture
-    blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
+    blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ));
    //}
 }

@ -1067,9 +1085,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
 	uint32 hostIndexCount;
 	uint32 indexMin = 0;
 	uint32 indexMax = 0;
-	uint32 indexBufferOffset = 0;
-	uint32 indexBufferIndex = 0;
-	LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
+	Renderer::IndexAllocation indexAllocation;
+	LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation);
+	auto indexAllocationMtl = static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(indexAllocation.rendererInternal);

 	// Buffer cache
 	if (m_memoryManager->UseHostMemoryForCache())
@ -1308,20 +1326,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
 	BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader);

 	// Draw
-	MTL::Buffer* indexBuffer = nullptr;
-	if (hostIndexType != INDEX_TYPE::NONE)
-	{
-	    auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
-	    indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex);
-
-		// We have already retrieved the buffer, no need for it to be locked anymore
-		bufferAllocator.UnlockBuffer(indexBufferIndex);
-	}
-
 	if (usesGeometryShader)
 	{
-	    if (indexBuffer)
-		    SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding);
+	    if (hostIndexType != INDEX_TYPE::NONE)
+		    SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, vertexShader->resourceMapping.indexBufferBinding);

 		uint8 hostIndexTypeU8 = (uint8)hostIndexType;
 		renderCommandEncoder->setObjectBytes(&hostIndexTypeU8, sizeof(hostIndexTypeU8), vertexShader->resourceMapping.indexTypeBinding);
@ -1349,10 +1357,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
 	}
 	else
 	{
-        if (indexBuffer)
+        if (hostIndexType != INDEX_TYPE::NONE)
       	{
       	    auto mtlIndexType = GetMtlIndexType(hostIndexType);
-      		renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance);
+      		renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, instanceCount, baseVertex, baseInstance);
       	}
       	else
       	{
@ -1492,29 +1500,21 @@ void MetalRenderer::draw_handleSpecialState5()
 	renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle,  NS::UInteger(0),  NS::UInteger(3));
 }

-void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex)
+Renderer::IndexAllocation MetalRenderer::indexData_reserveIndexMemory(uint32 size)
 {
-    auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
-    auto allocation = bufferAllocator.GetBufferAllocation(size);
-	offset = allocation.offset;
-	bufferIndex = allocation.bufferIndex;
+    auto allocation = m_memoryManager->GetIndexAllocator().AllocateBufferMemory(size, 128);

-	// Lock the buffer so that it doesn't get released
-	bufferAllocator.LockBuffer(allocation.bufferIndex);
-
-	return allocation.data;
+    return {allocation->memPtr, allocation};
 }

-void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size)
+void MetalRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation)
 {
-    // Do nothing
-    /*
-    if (!HasUnifiedMemory())
-    {
-        auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex);
-        buffer->didModifyRange(NS::Range(offset, size));
+    m_memoryManager->GetIndexAllocator().FreeReservation(static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(allocation.rendererInternal));
 }
-    */
+
+void MetalRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation)
+{
+    m_memoryManager->GetIndexAllocator().FlushReservation(static_cast<MetalSynchronizedHeapAllocator::AllocatorReservation*>(allocation.rendererInternal));
 }

 LatteQueryObject* MetalRenderer::occlusionQuery_create() {
@ -1652,9 +1652,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer()
 		m_recordedDrawcalls = 0;
 		m_commitTreshold = m_defaultCommitTreshlod;

-		// Notify memory manager about the new command buffer
-        m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer);
-
        // Debug
        m_performanceMonitor.m_commandBuffers++;

@ -1835,8 +1832,6 @@ void MetalRenderer::CommitCommandBuffer()

        m_executingCommandBuffers.push_back(mtlCommandBuffer);

-        m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr);
-
        // Debug
        //m_commandQueue->insertDebugCaptureBoundary();
    }
@ -1845,26 +1840,20 @@ void MetalRenderer::CommitCommandBuffer()
 void MetalRenderer::ProcessFinishedCommandBuffers()
 {
    // Check for finished command buffers
-    bool atLeastOneCompleted = false;
    for (auto it = m_executingCommandBuffers.begin(); it != m_executingCommandBuffers.end();)
    {
        auto commandBuffer = *it;
        if (CommandBufferCompleted(commandBuffer))
        {
-            m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer);
+            m_memoryManager->CleanupBuffers(commandBuffer);
            commandBuffer->release();
            it = m_executingCommandBuffers.erase(it);
-            atLeastOneCompleted = true;
        }
        else
        {
            ++it;
        }
    }
-
-    // Invalidate indices if at least one command buffer has completed
-    if (atLeastOneCompleted)
-        LatteIndices_invalidateAll();
 }

 bool MetalRenderer::AcquireDrawable(bool mainWindow)
@ -2102,15 +2091,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
 			}
 		}

-		auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator();
 		size_t size = shader->uniform.uniformRangeSize;
-		auto supportBuffer = bufferAllocator.GetBufferAllocation(size);
-		memcpy(supportBuffer.data, supportBufferData, size);
-		auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex);
-		//if (!HasUnifiedMemory())
-		//    buffer->didModifyRange(NS::Range(supportBuffer.offset, size));
+		auto& bufferAllocator = m_memoryManager->GetStagingAllocator();
+		auto allocation = bufferAllocator.AllocateBufferMemory(size, 1);
+		memcpy(allocation.memPtr, supportBufferData, size);
+		bufferAllocator.FlushReservation(allocation);

-		SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint);
+		SetBuffer(renderCommandEncoder, mtlShaderType, allocation.mtlBuffer, allocation.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint);
 	}

 	// Uniform buffers
--- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h
+++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h
@ -7,19 +7,6 @@
 #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h"
 #include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h"

-struct MetalBufferAllocation
-{
-    void* data;
-    uint32 bufferIndex;
-    size_t offset = INVALID_OFFSET;
-    size_t size;
-
-    bool IsValid() const
-    {
-        return offset != INVALID_OFFSET;
-    }
-};
-
 enum MetalGeneralShaderType
 {
    METAL_GENERAL_SHADER_TYPE_VERTEX,
@ -271,8 +258,9 @@ public:
 	void draw_handleSpecialState5();

 	// index
-	void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override;
-	void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override;
+	IndexAllocation indexData_reserveIndexMemory(uint32 size) override;
+	void indexData_releaseIndexMemory(IndexAllocation& allocation) override;
+	void indexData_uploadIndexMemory(IndexAllocation& allocation) override;

 	// occlusion queries
 	LatteQueryObject* occlusionQuery_create() override;
@ -294,14 +282,14 @@ public:
        return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited);
    }

-	MTL::CommandBuffer* GetCurrentCommandBuffer()
+	MTL::CommandBuffer* GetCurrentCommandBuffer() const
    {
        cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer);

        return m_currentCommandBuffer.m_commandBuffer;
    }

-    MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted()
+    MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const
    {
        // The command buffer has been commited and has finished execution
        if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0)
--- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
+++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp
@ -15,8 +15,8 @@
 #define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512)

 static bool s_isLoadingShadersMtl{false};
-static bool s_hasRAMFilesystem{false};
-class FileCache* s_airCache{nullptr};
+//static bool s_hasRAMFilesystem{false};
+//class FileCache* s_airCache{nullptr};

 extern std::atomic_int g_compiled_shaders_total;
 extern std::atomic_int g_compiled_shaders_async;
@ -190,6 +190,7 @@ void RendererShaderMtl::ShaderCacheLoading_end()
 void RendererShaderMtl::ShaderCacheLoading_Close()
 {
    // Close the AIR cache
+    /*
    if (s_airCache)
    {
        delete s_airCache;
@ -197,7 +198,6 @@ void RendererShaderMtl::ShaderCacheLoading_Close()
    }

    // Close RAM filesystem
-    /*
    if (s_hasRAMFilesystem)
        executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH);
    */