diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index ced42766..881a6d6d 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -555,6 +555,7 @@ if(ENABLE_METAL) HW/Latte/Renderer/Metal/CachedFBOMtl.h HW/Latte/Renderer/Metal/MetalAttachmentsInfo.cpp HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h + HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp HW/Latte/Renderer/Metal/MetalBufferAllocator.h HW/Latte/Renderer/Metal/MetalMemoryManager.cpp HW/Latte/Renderer/Metal/MetalMemoryManager.h diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp new file mode 100644 index 00000000..05d169b3 --- /dev/null +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.cpp @@ -0,0 +1,217 @@ +#include "Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h" + +MetalBufferChunkedHeap::~MetalBufferChunkedHeap() +{ + for (auto& chunk : m_chunkBuffers) + chunk->release(); +} + +uint32 MetalBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) +{ + size_t allocationSize = std::max(m_minimumBufferAllocationSize, minimumAllocationSize); + MTL::Buffer* buffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options); + cemu_assert_debug(buffer); + cemu_assert_debug(m_chunkBuffers.size() == chunkIndex); + m_chunkBuffers.emplace_back(buffer); + + return allocationSize; +} + +void MetalSynchronizedRingAllocator::addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset) +{ + auto commandBuffer = m_mtlr->GetCurrentCommandBuffer(); + if (commandBuffer == buffer.lastSyncpointCommandBuffer) + return; + buffer.lastSyncpointCommandBuffer = commandBuffer; + buffer.queue_syncPoints.emplace(commandBuffer, offset); +} + +void MetalSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc) +{ + // calculate buffer size, should be a multiple of bufferAllocSize that is at least as large as sizeRequiredForAlloc + uint32 bufferAllocSize = m_minimumBufferAllocSize; + while (bufferAllocSize < sizeRequiredForAlloc) + bufferAllocSize += m_minimumBufferAllocSize; + + AllocatorBuffer_t newBuffer{}; + newBuffer.writeIndex = 0; + newBuffer.basePtr = nullptr; + newBuffer.mtlBuffer = m_mtlr->GetDevice()->newBuffer(bufferAllocSize, m_options); + newBuffer.basePtr = (uint8*)newBuffer.mtlBuffer->contents(); + newBuffer.size = bufferAllocSize; + newBuffer.index = (uint32)m_buffers.size(); + m_buffers.push_back(newBuffer); +} + +MetalSynchronizedRingAllocator::AllocatorReservation_t MetalSynchronizedRingAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) +{ + if (alignment < 128) + alignment = 128; + size = (size + 127) & ~127; + + for (auto& itr : m_buffers) + { + // align pointer + uint32 alignmentPadding = (alignment - (itr.writeIndex % alignment)) % alignment; + uint32 distanceToSyncPoint; + if (!itr.queue_syncPoints.empty()) + { + if (itr.queue_syncPoints.front().offset < itr.writeIndex) + distanceToSyncPoint = 0xFFFFFFFF; + else + distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex; + } + else + distanceToSyncPoint = 0xFFFFFFFF; + uint32 spaceNeeded = alignmentPadding + size; + if (spaceNeeded > distanceToSyncPoint) + continue; // not enough space in current buffer + if ((itr.writeIndex + spaceNeeded) > itr.size) + { + // wrap-around + spaceNeeded = size; + alignmentPadding = 0; + // check if there is enough space in current buffer after wrap-around + if (!itr.queue_syncPoints.empty()) + { + distanceToSyncPoint = itr.queue_syncPoints.front().offset - 0; + if (spaceNeeded > distanceToSyncPoint) + continue; + } + else if (spaceNeeded > itr.size) + continue; + itr.writeIndex = 0; + } + addUploadBufferSyncPoint(itr, itr.writeIndex); + itr.writeIndex += alignmentPadding; + uint32 offset = itr.writeIndex; + itr.writeIndex += size; + itr.cleanupCounter = 0; + MetalSynchronizedRingAllocator::AllocatorReservation_t res; + res.mtlBuffer = itr.mtlBuffer; + res.memPtr = itr.basePtr + offset; + res.bufferOffset = offset; + res.size = size; + res.bufferIndex = itr.index; + + return res; + } + + // allocate new buffer + allocateAdditionalUploadBuffer(size); + + return AllocateBufferMemory(size, alignment); +} + +void MetalSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation) +{ + if (RequiresFlush()) + { + uploadReservation.mtlBuffer->didModifyRange(NS::Range(uploadReservation.bufferOffset, uploadReservation.size)); + } +} + +void MetalSynchronizedRingAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) +{ + for (auto& itr : m_buffers) + { + while (!itr.queue_syncPoints.empty() && latestFinishedCommandBuffer == itr.queue_syncPoints.front().commandBuffer) + { + itr.queue_syncPoints.pop(); + } + if (itr.queue_syncPoints.empty()) + itr.cleanupCounter++; + } + + // check if last buffer is available for deletion + if (m_buffers.size() >= 2) + { + auto& lastBuffer = m_buffers.back(); + if (lastBuffer.cleanupCounter >= 1000) + { + // release buffer + lastBuffer.mtlBuffer->release(); + m_buffers.pop_back(); + } + } +} + +MTL::Buffer* MetalSynchronizedRingAllocator::GetBufferByIndex(uint32 index) const +{ + return m_buffers[index].mtlBuffer; +} + +void MetalSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const +{ + numBuffers = (uint32)m_buffers.size(); + totalBufferSize = 0; + freeBufferSize = 0; + for (auto& itr : m_buffers) + { + totalBufferSize += itr.size; + // calculate free space in buffer + uint32 distanceToSyncPoint; + if (!itr.queue_syncPoints.empty()) + { + if (itr.queue_syncPoints.front().offset < itr.writeIndex) + distanceToSyncPoint = (itr.size - itr.writeIndex) + itr.queue_syncPoints.front().offset; // size with wrap-around + else + distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex; + } + else + distanceToSyncPoint = itr.size; + freeBufferSize += distanceToSyncPoint; + } +} + +/* MetalSynchronizedHeapAllocator */ + +MetalSynchronizedHeapAllocator::AllocatorReservation* MetalSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) +{ + CHAddr addr = m_chunkedHeap.alloc(size, alignment); + m_activeAllocations.emplace_back(addr); + AllocatorReservation* res = m_poolAllocatorReservation.allocObj(); + res->bufferIndex = addr.chunkIndex; + res->bufferOffset = addr.offset; + res->size = size; + res->mtlBuffer = m_chunkedHeap.GetBufferByIndex(addr.chunkIndex); + res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset; + + return res; +} + +void MetalSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation) +{ + // put the allocation on a delayed release queue for the current command buffer + MTL::CommandBuffer* currentCommandBuffer = m_mtlr->GetCurrentCommandBuffer(); + auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; }); + cemu_assert_debug(it != m_activeAllocations.end()); + m_releaseQueue[currentCommandBuffer].emplace_back(it->allocation); + m_activeAllocations.erase(it); + m_poolAllocatorReservation.freeObj(uploadReservation); +} + +void MetalSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation) +{ + if (m_chunkedHeap.RequiresFlush()) + { + uploadReservation->mtlBuffer->didModifyRange(NS::Range(uploadReservation->bufferOffset, uploadReservation->size)); + } +} + +void MetalSynchronizedHeapAllocator::CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer) +{ + auto it = m_releaseQueue.find(latestFinishedCommandBuffer); + if (it == m_releaseQueue.end()) + return; + + // release allocations + for (auto& addr : it->second) + m_chunkedHeap.free(addr); + m_releaseQueue.erase(it); +} + +void MetalSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const +{ + m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h index 209b1395..2a62de19 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalBufferAllocator.h @@ -1,354 +1,163 @@ #pragma once #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" -#include "Common/precompiled.h" #include "Metal/MTLResource.hpp" +#include "util/ChunkedHeap/ChunkedHeap.h" +#include "util/helpers/MemoryPool.h" + #include -struct MetalBufferRange +inline MTL::ResourceOptions GetResourceOptions(MTL::ResourceOptions options) { - size_t offset; - size_t size; + if (options & MTL::ResourceStorageModeShared || options & MTL::ResourceStorageModeManaged) + options |= MTL::ResourceCPUCacheModeWriteCombined; + + return options; +} + +class MetalBufferChunkedHeap : private ChunkedHeap<> +{ + public: + MetalBufferChunkedHeap(const class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocationSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; + ~MetalBufferChunkedHeap(); + + using ChunkedHeap::alloc; + using ChunkedHeap::free; + + uint8* GetChunkPtr(uint32 index) const + { + if (index >= m_chunkBuffers.size()) + return nullptr; + + return (uint8*)m_chunkBuffers[index]->contents(); + } + + MTL::Buffer* GetBufferByIndex(uint32 index) const + { + cemu_assert_debug(index < m_chunkBuffers.size()); + + return m_chunkBuffers[index]; + } + + bool RequiresFlush() const + { + return m_options & MTL::ResourceStorageModeManaged; + } + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const + { + numBuffers = m_chunkBuffers.size(); + totalBufferSize = m_numHeapBytes; + freeBufferSize = m_numHeapBytes - m_numAllocatedBytes; + } + + private: + uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override; + + const class MetalRenderer* m_mtlr; + + MTL::ResourceOptions m_options; + size_t m_minimumBufferAllocationSize; + + std::vector m_chunkBuffers; }; -constexpr size_t BASE_ALLOCATION_SIZE = 8 * 1024 * 1024; // 8 MB -constexpr size_t MAX_ALLOCATION_SIZE = 64 * 1024 * 1024; // 64 MB - -void LatteIndices_invalidateAll(); - -template -class MetalBufferAllocator +// a circular ring-buffer which tracks and releases memory per command-buffer +class MetalSynchronizedRingAllocator { public: - struct Buffer + MetalSynchronizedRingAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, uint32 minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_options(GetResourceOptions(options)), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; + MetalSynchronizedRingAllocator(const MetalSynchronizedRingAllocator&) = delete; // disallow copy + + struct BufferSyncPoint_t + { + // todo - modularize sync point + MTL::CommandBuffer* commandBuffer; + uint32 offset; + + BufferSyncPoint_t(MTL::CommandBuffer* _commandBuffer, uint32 _offset) : commandBuffer(_commandBuffer), offset(_offset) {}; + }; + + struct AllocatorBuffer_t + { + MTL::Buffer* mtlBuffer; + uint8* basePtr; + uint32 size; + uint32 writeIndex; + std::queue queue_syncPoints; + MTL::CommandBuffer* lastSyncpointCommandBuffer{ nullptr }; + uint32 index; + uint32 cleanupCounter{ 0 }; // increased by one every time CleanupBuffer() is called if there is no sync point. If it reaches 300 then the buffer is released + }; + + struct AllocatorReservation_t + { + MTL::Buffer* mtlBuffer; + uint8* memPtr; + uint32 bufferOffset; + uint32 size; + uint32 bufferIndex; + }; + + AllocatorReservation_t AllocateBufferMemory(uint32 size, uint32 alignment); + void FlushReservation(AllocatorReservation_t& uploadReservation); + void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); + MTL::Buffer* GetBufferByIndex(uint32 index) const; + + bool RequiresFlush() const { - MTL::Buffer* m_buffer; - std::vector m_freeRanges; - BufferT m_data; - }; - - MetalBufferAllocator(class MetalRenderer* metalRenderer, MTL::ResourceOptions storageMode) : m_mtlr{metalRenderer} { - m_isCPUAccessible = (storageMode == MTL::ResourceStorageModeShared) || (storageMode == MTL::ResourceStorageModeManaged); - - m_options = storageMode; - if (m_isCPUAccessible) - m_options |= MTL::ResourceCPUCacheModeWriteCombined; + return m_options & MTL::ResourceStorageModeManaged; } - ~MetalBufferAllocator() - { - for (auto buffer : m_buffers) - { - buffer.m_buffer->release(); - } - } - - void ResetAllocations() - { - for (uint32 i = 0; i < m_buffers.size(); i++) - FreeBuffer(i); - } - - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - return m_buffers[bufferIndex].m_buffer; - } - - MetalBufferAllocation GetBufferAllocation(size_t size) - { - // Align the size - size = Align(size, 128); - - // First, try to find a free range - for (uint32 i = 0; i < m_buffers.size(); i++) - { - auto& buffer = m_buffers[i]; - for (uint32 j = 0; j < buffer.m_freeRanges.size(); j++) - { - auto& range = buffer.m_freeRanges[j]; - if (size <= range.size) - { - MetalBufferAllocation allocation; - allocation.bufferIndex = i; - allocation.offset = range.offset; - allocation.size = size; - allocation.data = (m_isCPUAccessible ? (uint8*)buffer.m_buffer->contents() + range.offset : nullptr); - - range.offset += size; - range.size -= size; - - if (range.size == 0) - { - buffer.m_freeRanges.erase(buffer.m_freeRanges.begin() + j); - } - - return allocation; - } - } - } - - // If no free range was found, allocate a new buffer - size_t allocationSize = BASE_ALLOCATION_SIZE * (1u << m_buffers.size()); - allocationSize = std::min(allocationSize, MAX_ALLOCATION_SIZE); // Limit the allocation size - allocationSize = std::max(allocationSize, size); - MTL::Buffer* mtlBuffer = m_mtlr->GetDevice()->newBuffer(allocationSize, m_options); - #ifdef CEMU_DEBUG_ASSERT - mtlBuffer->setLabel(GetLabel("Buffer from buffer allocator", mtlBuffer)); - #endif - - MetalBufferAllocation allocation; - allocation.bufferIndex = m_buffers.size(); - allocation.offset = 0; - allocation.size = size; - allocation.data = (m_isCPUAccessible ? mtlBuffer->contents() : nullptr); - - m_buffers.push_back({mtlBuffer}); - auto& buffer = m_buffers.back(); - - // If the buffer is larger than the requested size, add the remaining space to the free buffer ranges - if (size < allocationSize) - { - MetalBufferRange range; - range.offset = size; - range.size = allocationSize - size; - - buffer.m_freeRanges.push_back(range); - } - - // Debug - m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory += allocationSize; - - return allocation; - } - - void FreeAllocation(MetalBufferAllocation& allocation) - { - MetalBufferRange range; - range.offset = allocation.offset; - range.size = allocation.size; - - allocation.offset = INVALID_OFFSET; - - // Find the correct position to insert the free range - auto& buffer = m_buffers[allocation.bufferIndex]; - for (uint32 i = 0; i < buffer.m_freeRanges.size(); i++) - { - auto& freeRange = buffer.m_freeRanges[i]; - if (freeRange.offset + freeRange.size == range.offset) - { - freeRange.size += range.size; - return; - } - } - - buffer.m_freeRanges.push_back(range); - } - -protected: - class MetalRenderer* m_mtlr; - - // TODO: make these template arguments - bool m_isCPUAccessible; - MTL::ResourceOptions m_options; - - std::vector m_buffers; - - void FreeBuffer(uint32 bufferIndex) - { - auto& buffer = m_buffers[bufferIndex]; - buffer.m_freeRanges.clear(); - buffer.m_freeRanges.push_back({0, buffer.m_buffer->length()}); - } -}; - -struct Empty {}; -typedef MetalBufferAllocator MetalDefaultBufferAllocator; - -struct MetalSyncedBuffer -{ - uint32 m_commandBufferCount = 0; - MTL::CommandBuffer* m_lastCommandBuffer = nullptr; - uint32 m_lock = 0; - - bool IsLocked() const - { - return (m_lock != 0); - } -}; - -constexpr uint16 BUFFER_RELEASE_FRAME_TRESHOLD = 1024; - -class MetalTemporaryBufferAllocator : public MetalBufferAllocator -{ -public: - MetalTemporaryBufferAllocator(class MetalRenderer* metalRenderer) : MetalBufferAllocator(metalRenderer, MTL::ResourceStorageModeShared) {} - - void LockBuffer(uint32 bufferIndex) - { - m_buffers[bufferIndex].m_data.m_lock++; - } - - void UnlockBuffer(uint32 bufferIndex) - { - auto& buffer = m_buffers[bufferIndex]; - - buffer.m_data.m_lock--; - - // Release the buffer if it wasn't released due to the lock - if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(bufferIndex); - } - - void EndFrame() - { - // Unlock all buffers - for (uint32_t i = 0; i < m_buffers.size(); i++) - { - auto& buffer = m_buffers[i]; - - if (buffer.m_data.IsLocked()) - { - if (buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(i); - - buffer.m_data.m_lock = 0; - } - } - - // TODO: do this for other buffer allocators as well? - // Track how many frames have passed since the last access to the back buffer - if (!m_buffers.empty()) - { - auto& backBuffer = m_buffers.back(); - if (backBuffer.m_data.m_commandBufferCount == 0) - { - // Release the back buffer if it hasn't been accessed for a while - if (m_framesSinceBackBufferAccess >= BUFFER_RELEASE_FRAME_TRESHOLD) - { - // Debug - m_mtlr->GetPerformanceMonitor().m_bufferAllocatorMemory -= backBuffer.m_buffer->length(); - - backBuffer.m_buffer->release(); - m_buffers.pop_back(); - - m_framesSinceBackBufferAccess = 0; - } - else - { - m_framesSinceBackBufferAccess++; - } - } - else - { - m_framesSinceBackBufferAccess = 0; - } - } - } - - void SetActiveCommandBuffer(MTL::CommandBuffer* commandBuffer) - { - m_activeCommandBuffer = commandBuffer; - if (commandBuffer) - { - auto result = m_executingCommandBuffers.emplace(std::make_pair(m_activeCommandBuffer, std::vector{})); - cemu_assert_debug(result.second); - m_activeCommandBufferIt = result.first; - } - else - { - m_activeCommandBufferIt = m_executingCommandBuffers.end(); - } - } - - void CommandBufferFinished(MTL::CommandBuffer* commandBuffer) - { - auto it = m_executingCommandBuffers.find(commandBuffer); - for (auto bufferIndex : it->second) - { - auto& buffer = m_buffers[bufferIndex]; - buffer.m_data.m_commandBufferCount--; - - // TODO: is this neccessary? - if (!buffer.m_data.IsLocked() && buffer.m_data.m_commandBufferCount == 0) - FreeBuffer(bufferIndex); - } - - m_executingCommandBuffers.erase(it); - } - - MTL::Buffer* GetBuffer(uint32 bufferIndex) - { - cemu_assert_debug(m_activeCommandBuffer); - - auto& buffer = m_buffers[bufferIndex]; - if (buffer.m_data.m_commandBufferCount == 0 || buffer.m_data.m_lastCommandBuffer != m_activeCommandBuffer) - { - m_activeCommandBufferIt->second.push_back(bufferIndex); - buffer.m_data.m_commandBufferCount++; - buffer.m_data.m_lastCommandBuffer = m_activeCommandBuffer; - } - - return buffer.m_buffer; - } - - MTL::Buffer* GetBufferOutsideOfCommandBuffer(uint32 bufferIndex) - { - return m_buffers[bufferIndex].m_buffer; - } - - /* - MetalBufferAllocation GetBufferAllocation(size_t size) - { - if (!m_activeCommandBuffer) - throw std::runtime_error("No active command buffer when allocating a buffer!"); - - auto allocation = MetalBufferAllocator::GetBufferAllocation(size); - - auto& buffer = m_buffers[allocation.bufferIndex]; - if (buffer.m_commandBuffers.empty() || buffer.m_commandBuffers.back() != m_activeCommandBuffer) - buffer.m_commandBuffers.push_back(m_activeCommandBuffer); - - return allocation; - } - */ - - // For debugging - /* - void LogInfo() - { - debug_printf("BUFFERS:\n"); - for (auto& buffer : m_buffers) - { - debug_printf(" %p -> size: %lu, command buffers: %zu\n", buffer.m_buffer, buffer.m_buffer->length(), buffer.m_data.m_commandBuffers.size()); - uint32 same = 0; - uint32 completed = 0; - for (uint32 i = 0; i < buffer.m_data.m_commandBuffers.size(); i++) - { - if (m_mtlr->CommandBufferCompleted(buffer.m_data.m_commandBuffers[i])) - completed++; - for (uint32 j = 0; j < buffer.m_data.m_commandBuffers.size(); j++) - { - if (i != j && buffer.m_data.m_commandBuffers[i] == buffer.m_data.m_commandBuffers[j]) - same++; - } - } - debug_printf(" same: %u\n", same); - debug_printf(" completed: %u\n", completed); - - debug_printf(" FREE RANGES:\n"); - for (auto& range : buffer.m_freeRanges) - { - debug_printf(" offset: %zu, size: %zu\n", range.offset, range.size); - } - } - } - */ + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; private: - MTL::CommandBuffer* m_activeCommandBuffer = nullptr; + void allocateAdditionalUploadBuffer(uint32 sizeRequiredForAlloc); + void addUploadBufferSyncPoint(AllocatorBuffer_t& buffer, uint32 offset); - std::map> m_executingCommandBuffers; - std::map>::iterator m_activeCommandBufferIt; + const class MetalRenderer* m_mtlr; - uint16 m_framesSinceBackBufferAccess = 0; + MTL::ResourceOptions m_options; + const uint32 m_minimumBufferAllocSize; + + std::vector m_buffers; +}; + +// heap style allocator with released memory being freed after the current command buffer finishes +class MetalSynchronizedHeapAllocator +{ + struct TrackedAllocation + { + TrackedAllocation(CHAddr allocation) : allocation(allocation) {}; + CHAddr allocation; + }; + + public: + MetalSynchronizedHeapAllocator(class MetalRenderer* mtlRenderer, MTL::ResourceOptions options, size_t minimumBufferAllocSize) : m_mtlr(mtlRenderer), m_chunkedHeap(m_mtlr, options, minimumBufferAllocSize) {} + MetalSynchronizedHeapAllocator(const MetalSynchronizedHeapAllocator&) = delete; // disallow copy + + struct AllocatorReservation + { + MTL::Buffer* mtlBuffer; + uint8* memPtr; + uint32 bufferOffset; + uint32 size; + uint32 bufferIndex; + }; + + AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment); + void FreeReservation(AllocatorReservation* uploadReservation); + void FlushReservation(AllocatorReservation* uploadReservation); + + void CleanupBuffer(MTL::CommandBuffer* latestFinishedCommandBuffer); + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; + private: + const class MetalRenderer* m_mtlr; + MetalBufferChunkedHeap m_chunkedHeap; + // allocations + std::vector m_activeAllocations; + MemoryPool m_poolAllocatorReservation{32}; + // release queue + std::unordered_map> m_releaseQueue; }; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp index 8b9ac89f..7b1dd53f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp @@ -73,20 +73,15 @@ void MetalMemoryManager::UploadToBufferCache(const void* data, size_t offset, si if (m_bufferCacheMode == BufferCacheMode::DevicePrivate) { - auto allocation = m_tempBufferAllocator.GetBufferAllocation(size); - auto buffer = m_tempBufferAllocator.GetBufferOutsideOfCommandBuffer(allocation.bufferIndex); - memcpy((uint8*)buffer->contents() + allocation.offset, data, size); + auto blitCommandEncoder = m_mtlr->GetBlitCommandEncoder(); - // Lock the buffer to make sure it's not deallocated before the copy is done - m_tempBufferAllocator.LockBuffer(allocation.bufferIndex); + auto allocation = m_stagingAllocator.AllocateBufferMemory(size, 1); + memcpy(allocation.memPtr, data, size); + m_stagingAllocator.FlushReservation(allocation); - m_mtlr->CopyBufferToBuffer(buffer, allocation.offset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); + blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size); - // Make sure the buffer has the right command buffer - m_tempBufferAllocator.GetBuffer(allocation.bufferIndex); // TODO: make a helper function for this - - // We can now safely unlock the buffer - m_tempBufferAllocator.UnlockBuffer(allocation.bufferIndex); + //m_mtlr->CopyBufferToBuffer(allocation.mtlBuffer, allocation.bufferOffset, m_bufferCache, offset, size, ALL_MTL_RENDER_STAGES, ALL_MTL_RENDER_STAGES); } else { diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h index 3d70e0db..4e55fa6f 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h @@ -7,22 +7,17 @@ class MetalMemoryManager { public: - MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer, m_mtlr->GetOptimalBufferStorageMode()), m_framePersistentBufferAllocator(metalRenderer, MTL::ResourceStorageModePrivate), m_tempBufferAllocator(metalRenderer) {} + MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_stagingAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 32u * 1024 * 1024), m_indexAllocator(m_mtlr, m_mtlr->GetOptimalBufferStorageMode(), 4u * 1024 * 1024) {} ~MetalMemoryManager(); - MetalDefaultBufferAllocator& GetBufferAllocator() + MetalSynchronizedRingAllocator& GetStagingAllocator() { - return m_bufferAllocator; + return m_stagingAllocator; } - MetalDefaultBufferAllocator& GetFramePersistentBufferAllocator() + MetalSynchronizedHeapAllocator& GetIndexAllocator() { - return m_framePersistentBufferAllocator; - } - - MetalTemporaryBufferAllocator& GetTemporaryBufferAllocator() - { - return m_tempBufferAllocator; + return m_indexAllocator; } MTL::Buffer* GetBufferCache() @@ -30,6 +25,12 @@ public: return m_bufferCache; } + void CleanupBuffers(MTL::CommandBuffer* latestFinishedCommandBuffer) + { + m_stagingAllocator.CleanupBuffer(latestFinishedCommandBuffer); + m_indexAllocator.CleanupBuffer(latestFinishedCommandBuffer); + } + // Texture upload buffer void* AcquireTextureUploadBuffer(size_t size); void ReleaseTextureUploadBuffer(uint8* mem); @@ -65,9 +66,8 @@ private: std::vector m_textureUploadBuffer; - MetalDefaultBufferAllocator m_bufferAllocator; - MetalDefaultBufferAllocator m_framePersistentBufferAllocator; - MetalTemporaryBufferAllocator m_tempBufferAllocator; + MetalSynchronizedRingAllocator m_stagingAllocator; + MetalSynchronizedHeapAllocator m_indexAllocator; MTL::Buffer* m_bufferCache = nullptr; BufferCacheMode m_bufferCacheMode; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h index cb65162e..bdbaa84b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPerformanceMonitor.h @@ -3,8 +3,6 @@ class MetalPerformanceMonitor { public: - size_t m_bufferAllocatorMemory = 0; - // Per frame data uint32 m_commandBuffers = 0; uint32 m_renderPasses = 0; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 83c39b5c..61e5c94a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -16,13 +16,12 @@ #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" -#include "Cemu/Logging/CemuDebugLogging.h" +#include "Cafe/HW/Latte/Core/LatteBufferCache.h" #include "Cemu/Logging/CemuLogging.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LatteConst.h" -#include "Common/precompiled.h" +#include "HW/Latte/Renderer/Metal/MetalBufferAllocator.h" #include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "Metal/MTLCaptureManager.hpp" #include "config/CemuConfig.h" #include "gui/guiWrapper.h" @@ -191,6 +190,7 @@ MetalRenderer::MetalRenderer() utilityLibrary->release(); // HACK: for some reason, this variable ends up being initialized to some garbage data, even though its declared as bool m_captureFrame = false; + m_occlusionQuery.m_lastCommandBuffer = nullptr; m_captureFrame = false; } @@ -302,12 +302,6 @@ void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) // Reset the command buffers (they are released by TemporaryBufferAllocator) CommitCommandBuffer(); - // Release frame persistent buffers - m_memoryManager->GetFramePersistentBufferAllocator().ResetAllocations(); - - // Unlock all temporary buffers - m_memoryManager->GetTemporaryBufferAllocator().EndFrame(); - // Debug m_performanceMonitor.ResetPerFrameData(); @@ -593,7 +587,6 @@ void MetalRenderer::AppendOverlayDebugInfo() ImGui::Text("--- Metal info ---"); ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize()); - ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024); ImGui::Text("--- Metal info (per frame) ---"); ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers); @@ -601,6 +594,31 @@ void MetalRenderer::AppendOverlayDebugInfo() ImGui::Text("Clears %u", m_performanceMonitor.m_clears); ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws); ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans); + + ImGui::Text("--- Cache debug info ---"); + + uint32 bufferCacheHeapSize = 0; + uint32 bufferCacheAllocationSize = 0; + uint32 bufferCacheNumAllocations = 0; + + LatteBufferCache_getStats(bufferCacheHeapSize, bufferCacheAllocationSize, bufferCacheNumAllocations); + + ImGui::Text("Buffer"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Allocs: %u", (uint32)(bufferCacheAllocationSize + 1023) / 1024, ((uint32)bufferCacheHeapSize + 1023) / 1024, (uint32)bufferCacheNumAllocations); + + uint32 numBuffers; + size_t totalSize, freeSize; + + m_memoryManager->GetStagingAllocator().GetStats(numBuffers, totalSize, freeSize); + ImGui::Text("Staging"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); + + m_memoryManager->GetIndexAllocator().GetStats(numBuffers, totalSize, freeSize); + ImGui::Text("Index"); + ImGui::SameLine(60.0f); + ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); } void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) @@ -682,17 +700,17 @@ void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, s auto blitCommandEncoder = GetBlitCommandEncoder(); // Allocate a temporary buffer - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetBufferAllocation(compressedImageSize); - auto buffer = bufferAllocator.GetBuffer(allocation.bufferIndex); + auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); + auto allocation = bufferAllocator.AllocateBufferMemory(compressedImageSize, 1); + bufferAllocator.FlushReservation(allocation); // Copy the data to the temporary buffer - memcpy(allocation.data, pixelData, compressedImageSize); + memcpy(allocation.memPtr, pixelData, compressedImageSize); //buffer->didModifyRange(NS::Range(allocation.offset, allocation.size)); // TODO: specify blit options when copying to a depth stencil texture? // Copy the data from the temporary buffer to the texture - blitCommandEncoder->copyFromBuffer(buffer, allocation.offset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); + blitCommandEncoder->copyFromBuffer(allocation.mtlBuffer, allocation.bufferOffset, bytesPerRow, 0, MTL::Size(width, height, 1), textureMtl->GetTexture(), sliceIndex, mipIndex, MTL::Origin(0, 0, offsetZ)); //} } @@ -1067,9 +1085,9 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 hostIndexCount; uint32 indexMin = 0; uint32 indexMax = 0; - uint32 indexBufferOffset = 0; - uint32 indexBufferIndex = 0; - LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); + Renderer::IndexAllocation indexAllocation; + LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation); + auto indexAllocationMtl = static_cast(indexAllocation.rendererInternal); // Buffer cache if (m_memoryManager->UseHostMemoryForCache()) @@ -1308,20 +1326,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 BindStageResources(renderCommandEncoder, pixelShader, usesGeometryShader); // Draw - MTL::Buffer* indexBuffer = nullptr; - if (hostIndexType != INDEX_TYPE::NONE) - { - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - indexBuffer = bufferAllocator.GetBuffer(indexBufferIndex); - - // We have already retrieved the buffer, no need for it to be locked anymore - bufferAllocator.UnlockBuffer(indexBufferIndex); - } - if (usesGeometryShader) { - if (indexBuffer) - SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexBuffer, indexBufferOffset, vertexShader->resourceMapping.indexBufferBinding); + if (hostIndexType != INDEX_TYPE::NONE) + SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_OBJECT, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, vertexShader->resourceMapping.indexBufferBinding); uint8 hostIndexTypeU8 = (uint8)hostIndexType; renderCommandEncoder->setObjectBytes(&hostIndexTypeU8, sizeof(hostIndexTypeU8), vertexShader->resourceMapping.indexTypeBinding); @@ -1349,10 +1357,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 } else { - if (indexBuffer) + if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); - renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); + renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexAllocationMtl->mtlBuffer, indexAllocationMtl->bufferOffset, instanceCount, baseVertex, baseInstance); } else { @@ -1492,29 +1500,21 @@ void MetalRenderer::draw_handleSpecialState5() renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } -void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) +Renderer::IndexAllocation MetalRenderer::indexData_reserveIndexMemory(uint32 size) { - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); - auto allocation = bufferAllocator.GetBufferAllocation(size); - offset = allocation.offset; - bufferIndex = allocation.bufferIndex; + auto allocation = m_memoryManager->GetIndexAllocator().AllocateBufferMemory(size, 128); - // Lock the buffer so that it doesn't get released - bufferAllocator.LockBuffer(allocation.bufferIndex); - - return allocation.data; + return {allocation->memPtr, allocation}; } -void MetalRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) +void MetalRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation) { - // Do nothing - /* - if (!HasUnifiedMemory()) - { - auto buffer = m_memoryManager->GetTemporaryBufferAllocator().GetBufferOutsideOfCommandBuffer(bufferIndex); - buffer->didModifyRange(NS::Range(offset, size)); - } - */ + m_memoryManager->GetIndexAllocator().FreeReservation(static_cast(allocation.rendererInternal)); +} + +void MetalRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation) +{ + m_memoryManager->GetIndexAllocator().FlushReservation(static_cast(allocation.rendererInternal)); } LatteQueryObject* MetalRenderer::occlusionQuery_create() { @@ -1652,9 +1652,6 @@ MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() m_recordedDrawcalls = 0; m_commitTreshold = m_defaultCommitTreshlod; - // Notify memory manager about the new command buffer - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(mtlCommandBuffer); - // Debug m_performanceMonitor.m_commandBuffers++; @@ -1835,8 +1832,6 @@ void MetalRenderer::CommitCommandBuffer() m_executingCommandBuffers.push_back(mtlCommandBuffer); - m_memoryManager->GetTemporaryBufferAllocator().SetActiveCommandBuffer(nullptr); - // Debug //m_commandQueue->insertDebugCaptureBoundary(); } @@ -1845,26 +1840,20 @@ void MetalRenderer::CommitCommandBuffer() void MetalRenderer::ProcessFinishedCommandBuffers() { // Check for finished command buffers - bool atLeastOneCompleted = false; for (auto it = m_executingCommandBuffers.begin(); it != m_executingCommandBuffers.end();) { auto commandBuffer = *it; if (CommandBufferCompleted(commandBuffer)) { - m_memoryManager->GetTemporaryBufferAllocator().CommandBufferFinished(commandBuffer); + m_memoryManager->CleanupBuffers(commandBuffer); commandBuffer->release(); it = m_executingCommandBuffers.erase(it); - atLeastOneCompleted = true; } else { ++it; } } - - // Invalidate indices if at least one command buffer has completed - if (atLeastOneCompleted) - LatteIndices_invalidateAll(); } bool MetalRenderer::AcquireDrawable(bool mainWindow) @@ -2102,15 +2091,13 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE } } - auto& bufferAllocator = m_memoryManager->GetTemporaryBufferAllocator(); size_t size = shader->uniform.uniformRangeSize; - auto supportBuffer = bufferAllocator.GetBufferAllocation(size); - memcpy(supportBuffer.data, supportBufferData, size); - auto buffer = bufferAllocator.GetBuffer(supportBuffer.bufferIndex); - //if (!HasUnifiedMemory()) - // buffer->didModifyRange(NS::Range(supportBuffer.offset, size)); + auto& bufferAllocator = m_memoryManager->GetStagingAllocator(); + auto allocation = bufferAllocator.AllocateBufferMemory(size, 1); + memcpy(allocation.memPtr, supportBufferData, size); + bufferAllocator.FlushReservation(allocation); - SetBuffer(renderCommandEncoder, mtlShaderType, buffer, supportBuffer.offset, shader->resourceMapping.uniformVarsBufferBindingPoint); + SetBuffer(renderCommandEncoder, mtlShaderType, allocation.mtlBuffer, allocation.bufferOffset, shader->resourceMapping.uniformVarsBufferBindingPoint); } // Uniform buffers diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 760ad6bc..04c63be8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -7,19 +7,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalOutputShaderCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalAttachmentsInfo.h" -struct MetalBufferAllocation -{ - void* data; - uint32 bufferIndex; - size_t offset = INVALID_OFFSET; - size_t size; - - bool IsValid() const - { - return offset != INVALID_OFFSET; - } -}; - enum MetalGeneralShaderType { METAL_GENERAL_SHADER_TYPE_VERTEX, @@ -271,8 +258,9 @@ public: void draw_handleSpecialState5(); // index - void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; - void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; + IndexAllocation indexData_reserveIndexMemory(uint32 size) override; + void indexData_releaseIndexMemory(IndexAllocation& allocation) override; + void indexData_uploadIndexMemory(IndexAllocation& allocation) override; // occlusion queries LatteQueryObject* occlusionQuery_create() override; @@ -294,14 +282,14 @@ public: return (m_currentCommandBuffer.m_commandBuffer && !m_currentCommandBuffer.m_commited); } - MTL::CommandBuffer* GetCurrentCommandBuffer() + MTL::CommandBuffer* GetCurrentCommandBuffer() const { cemu_assert_debug(m_currentCommandBuffer.m_commandBuffer); return m_currentCommandBuffer.m_commandBuffer; } - MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() + MTL::CommandBuffer* GetAndRetainCurrentCommandBufferIfNotCompleted() const { // The command buffer has been commited and has finished execution if (m_currentCommandBuffer.m_commited && m_executingCommandBuffers.size() == 0) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 9e6d3b9c..07073e08 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -15,8 +15,8 @@ #define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) static bool s_isLoadingShadersMtl{false}; -static bool s_hasRAMFilesystem{false}; -class FileCache* s_airCache{nullptr}; +//static bool s_hasRAMFilesystem{false}; +//class FileCache* s_airCache{nullptr}; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -190,6 +190,7 @@ void RendererShaderMtl::ShaderCacheLoading_end() void RendererShaderMtl::ShaderCacheLoading_Close() { // Close the AIR cache + /* if (s_airCache) { delete s_airCache; @@ -197,7 +198,6 @@ void RendererShaderMtl::ShaderCacheLoading_Close() } // Close RAM filesystem - /* if (s_hasRAMFilesystem) executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); */