diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp index e77e4715..5374126a 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.cpp @@ -2,7 +2,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Metal/MTLTexture.hpp" uint32 LatteTextureMtl_AdjustTextureCompSel(Latte::E_GX2SURFFMT format, uint32 compSel) { @@ -159,21 +158,21 @@ MTL::Texture* LatteTextureViewMtl::CreateSwizzledView(uint32 gpuSamplerSwizzle) uint32 baseLevel = firstMip; uint32 levelCount = this->numMip; - uint32 baseLayer = 0; - uint32 layerCount = 1; - - // TODO: check if base texture is 3D texture as well? + uint32 baseLayer; + uint32 layerCount; + // TODO: check if base texture is 3D texture as well if (textureType == MTL::TextureType3D) { cemu_assert_debug(firstMip == 0); cemu_assert_debug(this->numSlice == baseTexture->depth); + baseLayer = 0; + layerCount = 1; } - // Cube array needs to have layer count multiple of 6 as opposed to when creating a texture - else if (textureType == MTL::TextureTypeCubeArray || textureType == MTL::TextureType2DArray) - { - baseLayer = firstSlice; + else + { + baseLayer = firstSlice; layerCount = this->numSlice; - } + } MTL::TextureSwizzleChannels swizzle; swizzle.red = GetMtlTextureSwizzle(compSelR); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index 4a6c9953..89029512 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -125,9 +125,18 @@ MetalRenderer::MetalRenderer() if (error) { cemuLog_log(LogType::Force, "failed to create utility library (error: {})", error->localizedDescription()->utf8String()); - return; } + // Pipelines + MTL::Function* vertexFullscreenFunction = utilityLibrary->newFunction(ToNSString("vertexFullscreen")); + MTL::Function* fragmentCopyDepthToColorFunction = utilityLibrary->newFunction(ToNSString("fragmentCopyDepthToColor")); + + m_copyDepthToColorDesc = MTL::RenderPipelineDescriptor::alloc()->init(); + m_copyDepthToColorDesc->setVertexFunction(vertexFullscreenFunction); + m_copyDepthToColorDesc->setFragmentFunction(fragmentCopyDepthToColorFunction); + vertexFullscreenFunction->release(); + fragmentCopyDepthToColorFunction->release(); + // Void vertex pipelines if (m_isAppleGPU) m_copyBufferToBufferPipeline = new MetalVoidVertexPipeline(this, utilityLibrary, "vertexCopyBufferToBuffer"); @@ -142,8 +151,9 @@ MetalRenderer::~MetalRenderer() //delete m_copyTextureToTexturePipeline; //delete m_restrideBufferPipeline; - //m_presentPipelineLinear->release(); - //m_presentPipelineSRGB->release(); + m_copyDepthToColorDesc->release(); + for (const auto [pixelFormat, pipeline] : m_copyDepthToColorPipelines) + pipeline->release(); delete m_outputShaderCache; delete m_pipelineCache; @@ -1348,14 +1358,44 @@ void MetalRenderer::draw_handleSpecialState5() LatteTextureView* colorBuffer = LatteMRT::GetColorAttachment(0); LatteTextureView* depthBuffer = LatteMRT::GetDepthAttachment(); + auto colorTextureMtl = static_cast(colorBuffer); + auto depthTextureMtl = static_cast(depthBuffer); sint32 vpWidth, vpHeight; LatteMRT::GetVirtualViewportDimensions(vpWidth, vpHeight); - surfaceCopy_copySurfaceWithFormatConversion( - depthBuffer->baseTexture, depthBuffer->firstMip, depthBuffer->firstSlice, - colorBuffer->baseTexture, colorBuffer->firstMip, colorBuffer->firstSlice, - vpWidth, vpHeight); + // Get the pipeline + MTL::PixelFormat colorPixelFormat = colorTextureMtl->GetRGBAView()->pixelFormat(); + auto& pipeline = m_copyDepthToColorPipelines[colorPixelFormat]; + if (!pipeline) + { + m_copyDepthToColorDesc->colorAttachments()->object(0)->setPixelFormat(colorPixelFormat); + + NS::Error* error = nullptr; + pipeline = m_device->newRenderPipelineState(m_copyDepthToColorDesc, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create copy depth to color pipeline (error: {})", error->localizedDescription()->utf8String()); + } + } + + // Sadly, we need to end encoding to ensure that the depth data is up-to-date + EndEncoding(); + + // Copy depth to color + auto renderCommandEncoder = GetRenderCommandEncoder(); + + auto& encoderState = m_state.m_encoderState; + + renderCommandEncoder->setRenderPipelineState(pipeline); + // TODO: make a helper function for this + encoderState.m_renderPipelineState = pipeline; + SetTexture(renderCommandEncoder, METAL_SHADER_TYPE_FRAGMENT, depthTextureMtl->GetRGBAView(), GET_HELPER_TEXTURE_BINDING(0)); + // TODO: make a helper function for this + renderCommandEncoder->setFragmentBytes(&vpWidth, sizeof(sint32), GET_HELPER_BUFFER_BINDING(0)); + encoderState.m_buffers[METAL_SHADER_TYPE_FRAGMENT][GET_HELPER_BUFFER_BINDING(0)] = {nullptr}; + + renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h index 2f851489..3f508ae8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h @@ -471,6 +471,10 @@ private: class MetalDepthStencilCache* m_depthStencilCache; class MetalSamplerCache* m_samplerCache; + // Pipelines + MTL::RenderPipelineDescriptor* m_copyDepthToColorDesc; + std::map m_copyDepthToColorPipelines; + // Void vertex pipelines class MetalVoidVertexPipeline* m_copyBufferToBufferPipeline; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h index 9fba1946..2041f4f8 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h @@ -8,24 +8,24 @@ using namespace metal; #define GET_BUFFER_BINDING(index) (28 + index) #define GET_TEXTURE_BINDING(index) (29 + index) -#define GET_SAMPLER_BINDING(index) (14 + index)\n +#define GET_SAMPLER_BINDING(index) (14 + index) + +constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; + +struct VertexOut { + float4 position [[position]]; + float2 texCoord; +}; + +vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { + VertexOut out; + out.position = float4(positions[vid], 0.0, 1.0); + out.texCoord = positions[vid] * 0.5 + 0.5; + out.texCoord.y = 1.0 - out.texCoord.y; + + return out; +} -//constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)}; -// -//struct VertexOut { -// float4 position [[position]]; -// float2 texCoord; -//}; -// -//vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) { -// VertexOut out; -// out.position = float4(positions[vid], 0.0, 1.0); -// out.texCoord = positions[vid] * 0.5 + 0.5; -// out.texCoord.y = 1.0 - out.texCoord.y; -// -// return out; -//} -// //fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d tex [[texture(0)]], //sampler samplr [[sampler(0)]]) { // return tex.sample(samplr, in.texCoord); //} @@ -34,19 +34,18 @@ vertex void vertexCopyBufferToBuffer(uint vid [[vertex_id]], device uint8_t* src dst[vid] = src[vid]; } -//vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d src [[texture(GET_TEXTURE_BINDING(0))]], texture2d dst [[texture(GET_TEXTURE_BINDING(1))]], constant uint32_t& width [[buffer(GET_BUFFER_BINDING(0))]]) { -// uint2 coord = uint2(vid % width, vid / width); -// return dst.write(float4(src.read(coord).r, 0.0, 0.0, 0.0), coord); -//} - -struct RestrideParams { - uint oldStride; - uint newStride; -}; - -vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { - for (uint32_t i = 0; i < params.oldStride; i++) { - dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; - } +fragment float4 fragmentCopyDepthToColor(VertexOut in [[stage_in]], texture2d src [[texture(GET_TEXTURE_BINDING(0))]]) { + return float4(src.read(uint2(in.position.xy)).r, 0.0, 0.0, 0.0); } + +//struct RestrideParams { +// uint oldStride; +// uint newStride; +//}; + +//vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer//(GET_BUFFER_BINDING(0))]], device uint8_t* dst [[buffer(GET_BUFFER_BINDING(1))]], constant //RestrideParams& params [[buffer(GET_BUFFER_BINDING(2))]]) { +// for (uint32_t i = 0; i < params.oldStride; i++) { +// dst[vid * params.newStride + i] = src[vid * params.oldStride + i]; +// } +//} )";