do buffer cache uploading on the GPU

This commit is contained in:
Samuliak 2024-08-29 19:06:01 +02:00
parent 2403cf948a
commit 1cfb841b5f
3 changed files with 70 additions and 69 deletions

View file

@ -22,11 +22,6 @@
#include "HW/Latte/Renderer/Metal/MetalCommon.h"
#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "Metal/MTLCommandBuffer.hpp"
#include "Metal/MTLDevice.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"
#include "Metal/MTLRenderPipeline.hpp"
#include "imgui.h"
#include <cstddef>
@ -780,31 +775,7 @@ void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32
void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size)
{
// Do the copy in a vertex shader on Apple GPUs
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
MTL::Resource* barrierBuffers[] = {m_xfbRingBuffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_xfbRingBuffer, srcOffset, GET_HELPER_BUFFER_BINDING(0));
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, m_memoryManager->GetBufferCache(), dstOffset, GET_HELPER_BUFFER_BINDING(1));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
barrierBuffers[0] = m_memoryManager->GetBufferCache();
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
}
else
{
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
}
CopyBufferToBuffer(m_xfbRingBuffer, srcOffset, m_memoryManager->GetBufferCache(), dstOffset, size);
}
void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size)
@ -945,9 +916,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
if (endRenderPass)
EndEncoding();
// Render pass
auto renderCommandEncoder = GetRenderCommandEncoder();
// Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode);
@ -955,6 +923,22 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect);
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
uint32 hostIndexCount;
uint32 indexMin = 0;
uint32 indexMax = 0;
uint32 indexBufferOffset = 0;
uint32 indexBufferIndex = 0;
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
// synchronize vertex and uniform cache and update buffer bindings
// We need to call this before getting the render command encoder, since it can cause buffer copies
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
// Render pass
auto renderCommandEncoder = GetRenderCommandEncoder();
// Depth stencil state
// Disable depth write when there is no depth attachment
@ -1120,18 +1104,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
// Resources
// Index buffer
Renderer::INDEX_TYPE hostIndexType;
uint32 hostIndexCount;
uint32 indexMin = 0;
uint32 indexMax = 0;
uint32 indexBufferOffset = 0;
uint32 indexBufferIndex = 0;
LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex);
// synchronize vertex and uniform cache and update buffer bindings
LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount);
// Vertex buffers
for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++)
{
@ -1851,6 +1823,37 @@ void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 s
EndEncoding();
}
void MetalRenderer::CopyBufferToBuffer(MTL::Buffer* src, uint32 srcOffset, MTL::Buffer* dst, uint32 dstOffset, uint32 size)
{
// Do the copy in a vertex shader on Apple GPUs
if (m_isAppleGPU && m_encoderType == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_commandEncoder);
MTL::Resource* barrierBuffers[] = {src};
// TODO: let the caller choose the stages
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh, MTL::RenderStageVertex);
renderCommandEncoder->setRenderPipelineState(m_copyBufferToBufferPipeline->GetRenderPipelineState());
m_state.m_encoderState.m_renderPipelineState = m_copyBufferToBufferPipeline->GetRenderPipelineState();
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, src, srcOffset, GET_HELPER_BUFFER_BINDING(0));
SetBuffer(renderCommandEncoder, METAL_SHADER_TYPE_VERTEX, dst, dstOffset, GET_HELPER_BUFFER_BINDING(1));
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), NS::UInteger(size));
barrierBuffers[0] = dst;
// TODO: let the caller choose the stages
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex | MTL::RenderStageFragment | MTL::RenderStageObject | MTL::RenderStageMesh);
}
else
{
auto blitCommandEncoder = GetBlitCommandEncoder();
blitCommandEncoder->copyFromBuffer(src, srcOffset, dst, dstOffset, size);
}
}
void MetalRenderer::SwapBuffer(bool mainWindow)
{
auto& layer = GetLayer(mainWindow);