#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalSamplerCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" #include "gui/guiWrapper.h" #define COMMIT_TRESHOLD 256 extern bool hasValidFramebufferAttached; float supportBufferData[512 * 4]; MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); m_commandQueue = m_device->newCommandQueue(); // Resources MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); #ifdef CEMU_DEBUG_ASSERT samplerDescriptor->setLabel(GetLabel("Nearest sampler state", samplerDescriptor)); #endif m_nearestSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); #ifdef CEMU_DEBUG_ASSERT samplerDescriptor->setLabel(GetLabel("Linear sampler state", samplerDescriptor)); #endif m_linearSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->release(); // Null resources MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setTextureType(MTL::TextureType1D); textureDescriptor->setWidth(4); m_nullTexture1D = m_device->newTexture(textureDescriptor); #ifdef CEMU_DEBUG_ASSERT m_nullTexture1D->setLabel(GetLabel("Null texture 1D", m_nullTexture1D)); #endif textureDescriptor->setTextureType(MTL::TextureType2D); textureDescriptor->setHeight(4); m_nullTexture2D = m_device->newTexture(textureDescriptor); #ifdef CEMU_DEBUG_ASSERT m_nullTexture2D->setLabel(GetLabel("Null texture 2D", m_nullTexture2D)); #endif textureDescriptor->release(); m_memoryManager = new MetalMemoryManager(this); m_pipelineCache = new MetalPipelineCache(this); m_depthStencilCache = new MetalDepthStencilCache(this); m_samplerCache = new MetalSamplerCache(this); // Texture readback m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::StorageModeShared); #ifdef CEMU_DEBUG_ASSERT m_readbackBuffer->setLabel(GetLabel("Texture readback buffer", m_readbackBuffer)); #endif // Transform feedback m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared); #ifdef CEMU_DEBUG_ASSERT m_xfbRingBuffer->setLabel(GetLabel("Transform feedback buffer", m_xfbRingBuffer)); #endif // Initialize state for (uint32 i = 0; i < METAL_SHADER_TYPE_TOTAL; i++) { for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; } // Utility shader library // Process the source first std::string processedUtilityShaderSource = utilityShaderSource; processedUtilityShaderSource.pop_back(); processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); processedUtilityShaderSource = "#include \nusing namespace metal;\n#define GET_BUFFER_BINDING(index) (27 + index)\n#define GET_TEXTURE_BINDING(index) (29 + index)\n#define GET_SAMPLER_BINDING(index) (14 + index)\n" + processedUtilityShaderSource; // Create the library NS::Error* error = nullptr; MTL::Library* utilityLibrary = m_device->newLibrary(ToNSString(processedUtilityShaderSource.c_str()), nullptr, &error); if (error) { debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); throw; return; } // Present pipeline MTL::Function* presentVertexFunction = utilityLibrary->newFunction(ToNSString("vertexFullscreen")); MTL::Function* presentFragmentFunction = utilityLibrary->newFunction(ToNSString("fragmentPresent")); MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); renderPipelineDescriptor->setVertexFunction(presentVertexFunction); renderPipelineDescriptor->setFragmentFunction(presentFragmentFunction); presentVertexFunction->release(); presentFragmentFunction->release(); error = nullptr; renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline linear", renderPipelineDescriptor)); #endif m_presentPipelineLinear = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); if (error) { debug_printf("failed to create linear present pipeline (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); } error = nullptr; renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB); #ifdef CEMU_DEBUG_ASSERT renderPipelineDescriptor->setLabel(GetLabel("Present pipeline sRGB", renderPipelineDescriptor)); #endif m_presentPipelineSRGB = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); renderPipelineDescriptor->release(); if (error) { debug_printf("failed to create sRGB present pipeline (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); } // Hybrid pipelines m_copyTextureToTexturePipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexCopyTextureToTexture", "kernelCopyTextureToTexture"); m_restrideBufferPipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexRestrideBuffer", "kernelRestrideBuffer"); utilityLibrary->release(); m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline); } MetalRenderer::~MetalRenderer() { delete m_copyTextureToTexturePipeline; delete m_restrideBufferPipeline; m_presentPipelineLinear->release(); m_presentPipelineSRGB->release(); delete m_pipelineCache; delete m_depthStencilCache; delete m_samplerCache; delete m_memoryManager; m_nearestSampler->release(); m_linearSampler->release(); m_readbackBuffer->release(); m_commandQueue->release(); m_device->release(); } // TODO: don't ignore "mainWindow" argument void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) { const auto& windowInfo = gui_getWindowInfo().window_main; m_metalLayer = (CA::MetalLayer*)CreateMetalLayer(windowInfo.handle, m_layerScaleX, m_layerScaleY); m_metalLayer->setDevice(m_device); m_metalLayer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY}); } // TODO: don't ignore "mainWindow" argument void MetalRenderer::ResizeLayer(const Vector2i& size, bool mainWindow) { m_metalLayer->setDrawableSize(CGSize{(float)size.x * m_layerScaleX, (float)size.y * m_layerScaleY}); } void MetalRenderer::Initialize() { Renderer::Initialize(); } void MetalRenderer::Shutdown() { Renderer::Shutdown(); CommitCommandBuffer(); } // TODO: what should this do? bool MetalRenderer::IsPadWindowActive() { //debug_printf("MetalRenderer::IsPadWindowActive not implemented\n"); return false; } bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const { debug_printf("MetalRenderer::GetVRAMInfo not implemented\n"); usageInMB = 1024; totalInMB = 1024; return false; } void MetalRenderer::ClearColorbuffer(bool padView) { if (!AcquireNextDrawable(!padView)) return; ClearColorTextureInternal(m_drawable->texture(), 0, 0, 0.0f, 0.0f, 0.0f, 0.0f); } void MetalRenderer::DrawEmptyFrame(bool mainWindow) { if (!BeginFrame(mainWindow)) return; SwapBuffers(mainWindow, !mainWindow); } void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) { if (m_drawable) { auto commandBuffer = GetCommandBuffer(); commandBuffer->presentDrawable(m_drawable); } else { debug_printf("skipped present!\n"); } m_drawable = nullptr; // Release all the command buffers CommitCommandBuffer(); for (uint32 i = 0; i < m_commandBuffers.size(); i++) m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Reset temporary buffers m_memoryManager->ResetTemporaryBuffers(); } // TODO: use `shader` for drawing void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter, sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight, bool padView, bool clearBackground) { if (!AcquireNextDrawable(!padView)) return; MTL::Texture* presentTexture = static_cast(texView)->GetRGBAView(); // Create render pass MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); auto colorAttachment = renderPassDescriptor->colorAttachments()->object(0); colorAttachment->setTexture(m_drawable->texture()); // TODO: shouldn't it be LoadActionLoad when not clearing? colorAttachment->setLoadAction(clearBackground ? MTL::LoadActionClear : MTL::LoadActionDontCare); colorAttachment->setStoreAction(MTL::StoreActionStore); auto renderCommandEncoder = GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); // Draw to Metal layer renderCommandEncoder->setRenderPipelineState(m_state.m_usesSRGB ? m_presentPipelineSRGB : m_presentPipelineLinear); renderCommandEncoder->setFragmentTexture(presentTexture, 0); renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), 0); renderCommandEncoder->setViewport(MTL::Viewport{(double)imageX, (double)imageY, (double)imageWidth, (double)imageHeight, 0.0, 1.0}); renderCommandEncoder->setScissorRect(MTL::ScissorRect{(uint32)imageX, (uint32)imageY, (uint32)imageWidth, (uint32)imageHeight}); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); EndEncoding(); } bool MetalRenderer::BeginFrame(bool mainWindow) { return AcquireNextDrawable(mainWindow); } void MetalRenderer::Flush(bool waitIdle) { // TODO: commit if commit on idle is requested if (m_recordedDrawcalls > 0) CommitCommandBuffer(); if (waitIdle) { // TODO: shouldn't we wait for all command buffers? WaitForCommandBufferCompletion(GetCurrentCommandBuffer()); } } void MetalRenderer::NotifyLatteCommandProcessorIdle() { // TODO: commit if commit on idle is requested //CommitCommandBuffer(); } void MetalRenderer::AppendOverlayDebugInfo() { debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n"); } // TODO: halfZ void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) { m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ}; } void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) { m_state.m_scissor = MTL::ScissorRect{(uint32)scissorX, (uint32)scissorY, (uint32)scissorWidth, (uint32)scissorHeight}; } LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) { return new CachedFBOMtl(key); } void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo) { if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO) m_state.m_activeFBO = nullptr; } void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) { m_state.m_activeFBO = (CachedFBOMtl*)cfbo; } void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size) { return m_memoryManager->GetTextureUploadBuffer(size); } void MetalRenderer::texture_releaseTextureUploadBuffer(uint8* mem) { // TODO: should the texture buffer get released? } TextureDecoder* MetalRenderer::texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) { return GetMtlTextureDecoder(format, isDepth); } void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) { if (hostTexture->isDepth) { texture_clearDepthSlice(hostTexture, sliceIndex, mipIndex, true, hostTexture->hasStencil, 0.0f, 0); } else { texture_clearColorSlice(hostTexture, sliceIndex, mipIndex, 0.0f, 0.0f, 0.0f, 0.0f); } } void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) { auto textureMtl = (LatteTextureMtl*)hostTexture; uint32 offsetZ = 0; if (textureMtl->Is3DTexture()) { offsetZ = sliceIndex; sliceIndex = 0; } size_t bytesPerRow = GetMtlTextureBytesPerRow(textureMtl->GetFormat(), textureMtl->IsDepth(), width); // No need to calculate bytesPerImage for 3D textures, since we always load just one slice //size_t bytesPerImage = GetMtlTextureBytesPerImage(textureMtl->GetFormat(), textureMtl->IsDepth(), height, bytesPerRow); textureMtl->GetTexture()->replaceRegion(MTL::Region(0, 0, offsetZ, width, height, 1), mipIndex, sliceIndex, pixelData, bytesPerRow, 0); } void MetalRenderer::texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) { auto mtlTexture = static_cast(hostTexture)->GetTexture(); ClearColorTextureInternal(mtlTexture, sliceIndex, mipIndex, r, g, b, a); } void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue) { auto mtlTexture = static_cast(hostTexture)->GetTexture(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); if (clearDepth) { auto depthAttachment = renderPassDescriptor->depthAttachment(); depthAttachment->setTexture(mtlTexture); depthAttachment->setClearDepth(depthValue); depthAttachment->setLoadAction(MTL::LoadActionClear); depthAttachment->setStoreAction(MTL::StoreActionStore); depthAttachment->setSlice(sliceIndex); depthAttachment->setLevel(mipIndex); } if (clearStencil && GetMtlPixelFormatInfo(hostTexture->format, true).hasStencil) { auto stencilAttachment = renderPassDescriptor->stencilAttachment(); stencilAttachment->setTexture(mtlTexture); stencilAttachment->setClearStencil(stencilValue); stencilAttachment->setLoadAction(MTL::LoadActionClear); stencilAttachment->setStoreAction(MTL::StoreActionStore); stencilAttachment->setSlice(sliceIndex); stencilAttachment->setLevel(mipIndex); } GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); EndEncoding(); } LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) { return new LatteTextureMtl(this, dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth); } void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) { m_state.m_textures[textureUnit] = static_cast(textureView); } void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth_) { auto blitCommandEncoder = GetBlitCommandEncoder(); auto mtlSrc = static_cast(src)->GetTexture(); auto mtlDst = static_cast(dst)->GetTexture(); uint32 srcBaseLayer = 0; uint32 dstBaseLayer = 0; uint32 srcOffsetZ = 0; uint32 dstOffsetZ = 0; uint32 srcLayerCount = 1; uint32 dstLayerCount = 1; uint32 srcDepth = 1; uint32 dstDepth = 1; if (src->Is3DTexture()) { srcOffsetZ = srcSlice; srcDepth = srcDepth_; } else { srcBaseLayer = srcSlice; srcLayerCount = srcDepth_; } if (dst->Is3DTexture()) { dstOffsetZ = dstSlice; dstDepth = srcDepth_; } else { dstBaseLayer = dstSlice; dstLayerCount = srcDepth_; } // If copying whole textures, we can do a more efficient copy if (effectiveSrcX == 0 && effectiveSrcY == 0 && effectiveDstX == 0 && effectiveDstY == 0 && srcOffsetZ == 0 && dstOffsetZ == 0 && effectiveCopyWidth == src->GetMipWidth(srcMip) && effectiveCopyHeight == src->GetMipHeight(srcMip) && srcDepth == src->GetMipDepth(srcMip) && effectiveCopyWidth == dst->GetMipWidth(dstMip) && effectiveCopyHeight == dst->GetMipHeight(dstMip) && dstDepth == dst->GetMipDepth(dstMip) && srcLayerCount == dstLayerCount) { blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer, srcMip, mtlDst, dstBaseLayer, dstMip, srcLayerCount, 1); } else { if (srcLayerCount == dstLayerCount) { for (uint32 i = 0; i < srcLayerCount; i++) { blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer + i, srcMip, MTL::Origin(effectiveSrcX, effectiveSrcY, srcOffsetZ), MTL::Size(effectiveCopyWidth, effectiveCopyHeight, srcDepth), mtlDst, dstBaseLayer + i, dstMip, MTL::Origin(effectiveDstX, effectiveDstY, dstOffsetZ)); } } else { for (uint32 i = 0; i < std::max(srcLayerCount, dstLayerCount); i++) { if (srcLayerCount == 1) srcOffsetZ++; else srcSlice++; if (dstLayerCount == 1) dstOffsetZ++; else dstSlice++; blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer, srcMip, MTL::Origin(effectiveSrcX, effectiveSrcY, srcOffsetZ), MTL::Size(effectiveCopyWidth, effectiveCopyHeight, 1), mtlDst, dstBaseLayer, dstMip, MTL::Origin(effectiveDstX, effectiveDstY, dstOffsetZ)); } } } } LatteTextureReadbackInfo* MetalRenderer::texture_createReadback(LatteTextureView* textureView) { size_t uploadSize = static_cast(textureView->baseTexture)->GetTexture()->allocatedSize(); if ((m_readbackBufferWriteOffset + uploadSize) > TEXTURE_READBACK_SIZE) { m_readbackBufferWriteOffset = 0; } auto* result = new LatteTextureReadbackInfoMtl(this, textureView, m_readbackBufferWriteOffset); m_readbackBufferWriteOffset += uploadSize; return result; } void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) { GetCommandBuffer(); // scale copy size to effective size sint32 effectiveCopyWidth = width; sint32 effectiveCopyHeight = height; LatteTexture_scaleToEffectiveSize(sourceTexture, &effectiveCopyWidth, &effectiveCopyHeight, 0); sint32 sourceEffectiveWidth, sourceEffectiveHeight; sourceTexture->GetEffectiveSize(sourceEffectiveWidth, sourceEffectiveHeight, srcMip); sint32 texSrcMip = srcMip; sint32 texSrcSlice = srcSlice; sint32 texDstMip = dstMip; sint32 texDstSlice = dstSlice; LatteTextureMtl* srcTextureMtl = static_cast(sourceTexture); LatteTextureMtl* dstTextureMtl = static_cast(destinationTexture); // check if texture rescale ratios match // todo - if not, we have to use drawcall based copying if (!LatteTexture_doesEffectiveRescaleRatioMatch(srcTextureMtl, texSrcMip, dstTextureMtl, texDstMip)) { cemuLog_logDebug(LogType::Force, "surfaceCopy_copySurfaceWithFormatConversion(): Mismatching dimensions"); return; } // check if bpp size matches if (srcTextureMtl->GetBPP() != dstTextureMtl->GetBPP()) { cemuLog_logDebug(LogType::Force, "surfaceCopy_copySurfaceWithFormatConversion(): Mismatching BPP"); return; } MTL::Texture* textures[] = {srcTextureMtl->GetTexture(), dstTextureMtl->GetTexture()}; struct CopyParams { uint32 width; uint32 srcMip; uint32 srcSlice; uint32 dstMip; uint32 dstSlice; } params{(uint32)effectiveCopyWidth, (uint32)texSrcMip, (uint32)texSrcSlice, (uint32)texDstMip, (uint32)texDstSlice}; if (m_encoderType == MetalEncoderType::Render) { auto renderCommandEncoder = static_cast(m_commandEncoder); renderCommandEncoder->setRenderPipelineState(m_copyTextureToTexturePipeline->GetRenderPipelineState()); m_state.m_encoderState.m_renderPipelineState = m_copyTextureToTexturePipeline->GetRenderPipelineState(); renderCommandEncoder->setVertexTextures(textures, NS::Range(GET_HELPER_TEXTURE_BINDING(0), 2)); m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(0)] = {(LatteTextureViewMtl*)textures[0]}; m_state.m_encoderState.m_textures[METAL_SHADER_TYPE_VERTEX][GET_HELPER_TEXTURE_BINDING(1)] = {(LatteTextureViewMtl*)textures[1]}; renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), GET_HELPER_BUFFER_BINDING(0)); m_state.m_encoderState.m_uniformBufferOffsets[METAL_SHADER_TYPE_VERTEX][GET_HELPER_BUFFER_BINDING(0)] = INVALID_OFFSET; renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } else { // TODO: do the copy in a compute shader debug_printf("surfaceCopy_copySurfaceWithFormatConversion: no active render command encoder, skipping copy\n"); } // TODO: restore state } void MetalRenderer::bufferCache_init(const sint32 bufferSize) { m_memoryManager->InitBufferCache(bufferSize); } void MetalRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) { m_memoryManager->UploadToBufferCache(buffer, bufferOffset, size); } void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) { m_memoryManager->CopyBufferCache(srcOffset, dstOffset, size); } void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) { debug_printf("MetalRenderer::bufferCache_copyStreamoutToMainBuffer not implemented\n"); } void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) { cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); auto& buffer = m_state.m_vertexBuffers[bufferIndex]; if (buffer.offset == offset && buffer.size == size) return; if (buffer.offset != INVALID_OFFSET) { m_memoryManager->UntrackVertexBuffer(bufferIndex); } buffer.needsRebind = true; buffer.offset = offset; buffer.size = size; buffer.restrideInfo = {}; m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo); } void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { m_state.m_uniformBufferOffsets[GetMtlShaderType(shaderType)][bufferIndex] = offset; } RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) { return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source); } void MetalRenderer::streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize) { debug_printf("MetalRenderer::streamout_setupXfbBuffer not implemented\n"); } void MetalRenderer::streamout_begin() { debug_printf("MetalRenderer::streamout_begin not implemented\n"); } void MetalRenderer::streamout_rendererFinishDrawcall() { debug_printf("MetalRenderer::streamout_rendererFinishDrawcall not implemented\n"); } void MetalRenderer::draw_beginSequence() { m_state.m_skipDrawSequence = false; // update shader state LatteSHRC_UpdateActiveShaders(); if (LatteGPUState.activeShaderHasError) { debug_printf("Skipping drawcalls due to shader error\n"); m_state.m_skipDrawSequence = true; cemu_assert_debug(false); return; } // update render target and texture state LatteGPUState.requiresTextureBarrier = false; while (true) { LatteGPUState.repeatTextureInitialization = false; if (!LatteMRT::UpdateCurrentFBO()) { debug_printf("Rendertarget invalid\n"); m_state.m_skipDrawSequence = true; return; // no render target } if (!hasValidFramebufferAttached) { debug_printf("Drawcall with no color buffer or depth buffer attached\n"); m_state.m_skipDrawSequence = true; return; // no render target } LatteTexture_updateTextures(); if (!LatteGPUState.repeatTextureInitialization) break; } // apply render target LatteMRT::ApplyCurrentState(); // viewport and scissor box LatteRenderTarget_updateViewport(); LatteRenderTarget_updateScissorBox(); // check for conditions which would turn the drawcalls into no-ops bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false; // GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly // for now we use a workaround: if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) rasterizerEnable = true; if (!rasterizerEnable == false) m_state.m_skipDrawSequence = true; // TODO: is this even needed? if (!m_state.m_activeFBO) m_state.m_skipDrawSequence = true; } void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) { // TODO: uncomment //if (m_state.m_skipDrawSequence) //{ // LatteGPUState.drawCallCounter++; // return; //} auto& encoderState = m_state.m_encoderState; // Render pass auto renderCommandEncoder = GetRenderCommandEncoder(); // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) { debug_printf("no vertex function, skipping draw\n"); return; } const auto fetchShader = LatteSHRC_GetActiveFetchShader(); // Depth stencil state MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); if (depthStencilState != encoderState.m_depthStencilState) { renderCommandEncoder->setDepthStencilState(depthStencilState); encoderState.m_depthStencilState = depthStencilState; } // Stencil reference bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); if (stencilEnable) { bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE(); uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F(); uint32 stencilRefBack; if (backStencilEnable) stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B(); else stencilRefBack = stencilRefFront; if (stencilRefFront != encoderState.m_stencilRefFront || stencilRefBack != encoderState.m_stencilRefBack) { renderCommandEncoder->setStencilReferenceValues(stencilRefFront, stencilRefBack); encoderState.m_stencilRefFront = stencilRefFront; encoderState.m_stencilRefBack = stencilRefBack; } } // Primitive type const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); // Blend color float* blendColorConstant = (float*)LatteGPUState.contextRegister + Latte::REGADDR::CB_BLEND_RED; renderCommandEncoder->setBlendColor(blendColorConstant[0], blendColorConstant[1], blendColorConstant[2], blendColorConstant[3]); // polygon control const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; const auto frontFace = polygonControlReg.get_FRONT_FACE(); uint32 cullFront = polygonControlReg.get_CULL_FRONT(); uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); // TODO //cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually //bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; if (polyOffsetFrontEnable) { uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue(); uint32 frontOffsetU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_OFFSET.getRawValue(); uint32 offsetClampU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_CLAMP.getRawValue(); if (frontOffsetU32 != encoderState.m_depthBias || frontScaleU32 != encoderState.m_depthSlope || offsetClampU32 != encoderState.m_depthClamp) { float frontScale = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.get_SCALE(); float frontOffset = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_OFFSET.get_OFFSET(); float offsetClamp = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_CLAMP.get_CLAMP(); frontScale /= 16.0f; renderCommandEncoder->setDepthBias(frontOffset, frontScale, offsetClamp); encoderState.m_depthBias = frontOffsetU32; encoderState.m_depthSlope = frontScaleU32; encoderState.m_depthClamp = offsetClampU32; } } else { if (0 != encoderState.m_depthBias || 0 != encoderState.m_depthSlope || 0 != encoderState.m_depthClamp) { renderCommandEncoder->setDepthBias(0.0f, 0.0f, 0.0f); encoderState.m_depthBias = 0; encoderState.m_depthSlope = 0; encoderState.m_depthClamp = 0; } } // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) { if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CW) cullFront = cullBack; else cullBack = cullFront; } // Cull mode if (cullFront && cullBack) return; // We can just skip the draw (TODO: can we?) MTL::CullMode cullMode; if (cullFront) cullMode = MTL::CullModeFront; else if (cullBack) cullMode = MTL::CullModeBack; else cullMode = MTL::CullModeNone; if (cullMode != encoderState.m_cullMode) { renderCommandEncoder->setCullMode(cullMode); encoderState.m_cullMode = cullMode; } // Front face MTL::Winding frontFaceWinding; if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CCW) frontFaceWinding = MTL::WindingCounterClockwise; else frontFaceWinding = MTL::WindingClockwise; if (frontFaceWinding != encoderState.m_frontFaceWinding) { renderCommandEncoder->setFrontFacingWinding(frontFaceWinding); encoderState.m_frontFaceWinding = frontFaceWinding; } // Viewport if (m_state.m_viewport.originX != encoderState.m_viewport.originX || m_state.m_viewport.originY != encoderState.m_viewport.originY || m_state.m_viewport.width != encoderState.m_viewport.width || m_state.m_viewport.height != encoderState.m_viewport.height || m_state.m_viewport.znear != encoderState.m_viewport.znear || m_state.m_viewport.zfar != encoderState.m_viewport.zfar) { renderCommandEncoder->setViewport(m_state.m_viewport); encoderState.m_viewport = m_state.m_viewport; } // Scissor if (m_state.m_scissor.x != encoderState.m_scissor.x || m_state.m_scissor.y != encoderState.m_scissor.y || m_state.m_scissor.width != encoderState.m_scissor.width || m_state.m_scissor.height != encoderState.m_scissor.height) { encoderState.m_scissor = m_state.m_scissor; // TODO: clamp scissor to render target dimensions //scissor.width = ; //scissor.height = ; renderCommandEncoder->setScissorRect(encoderState.m_scissor); } // Resources // Index buffer Renderer::INDEX_TYPE hostIndexType; uint32 hostIndexCount; uint32 indexMin = 0; uint32 indexMax = 0; uint32 indexBufferOffset = 0; uint32 indexBufferIndex = 0; LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); // synchronize vertex and uniform cache and update buffer bindings LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) { // Restride uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); // Bind if (vertexBufferRange.needsRebind) { renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); vertexBufferRange.needsRebind = false; } } } // Render pipeline state MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew); if (renderPipelineState != encoderState.m_renderPipelineState) { renderCommandEncoder->setRenderPipelineState(renderPipelineState); encoderState.m_renderPipelineState = renderPipelineState; } // Uniform buffers, textures and samplers BindStageResources(renderCommandEncoder, vertexShader); BindStageResources(renderCommandEncoder, pixelShader); // Draw if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); MTL::Buffer* indexBuffer = m_memoryManager->GetBuffer(indexBufferIndex); renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); } else { renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance); } LatteGPUState.drawCallCounter++; } void MetalRenderer::draw_endSequence() { LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); // post-drawcall logic if (pixelShader) LatteRenderTarget_trackUpdates(); bool hasReadback = LatteTextureReadback_Update(); m_recordedDrawcalls++; // The number of draw calls needs to twice as big, since we are interrupting the render pass if (m_recordedDrawcalls >= COMMIT_TRESHOLD * 2 || hasReadback) { CommitCommandBuffer(); // TODO: where should this be called? LatteTextureReadback_UpdateFinishedTransfers(false); } } void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { auto allocation = m_memoryManager->GetBufferAllocation(size); offset = allocation.bufferOffset; bufferIndex = allocation.bufferIndex; return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) { // Do nothing, since the buffer has shared storage mode } MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() { bool needsNewCommandBuffer = (m_commandBuffers.empty() || m_commandBuffers.back().m_commited); if (needsNewCommandBuffer) { // Debug //m_commandQueue->insertDebugCaptureBoundary(); MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); m_commandBuffers.push_back({mtlCommandBuffer}); return mtlCommandBuffer; } else { return m_commandBuffers.back().m_commandBuffer; } } bool MetalRenderer::CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) { return commandBuffer->status() == MTL::CommandBufferStatusCompleted; } void MetalRenderer::WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer) { commandBuffer->waitUntilCompleted(); } MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor) { EndEncoding(); auto commandBuffer = GetCommandBuffer(); auto renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); #ifdef CEMU_DEBUG_ASSERT renderCommandEncoder->setLabel(GetLabel("Temporary render command encoder", renderCommandEncoder)); #endif m_commandEncoder = renderCommandEncoder; m_encoderType = MetalEncoderType::Render; return renderCommandEncoder; } // Some render passes clear the attachments, forceRecreate is supposed to be used in those cases MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate, bool rebindStateIfNewEncoder) { // Check if we need to begin a new render pass if (m_commandEncoder) { if (!forceRecreate) { if (m_encoderType == MetalEncoderType::Render) { bool needsNewRenderPass = (m_state.m_lastUsedFBO == nullptr); if (!needsNewRenderPass) { for (uint8 i = 0; i < 8; i++) { if (m_state.m_activeFBO->colorBuffer[i].texture && m_state.m_activeFBO->colorBuffer[i].texture != m_state.m_lastUsedFBO->colorBuffer[i].texture) { needsNewRenderPass = true; break; } } } if (!needsNewRenderPass) { if (m_state.m_activeFBO->depthBuffer.texture && (m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture || ( m_state.m_activeFBO->depthBuffer.hasStencil && !m_state.m_lastUsedFBO->depthBuffer.hasStencil))) { needsNewRenderPass = true; } } if (!needsNewRenderPass) { return (MTL::RenderCommandEncoder*)m_commandEncoder; } } } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); // Update state m_state.m_lastUsedFBO = m_state.m_activeFBO; auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor()); #ifdef CEMU_DEBUG_ASSERT renderCommandEncoder->setLabel(GetLabel("Render command encoder", renderCommandEncoder)); #endif m_commandEncoder = renderCommandEncoder; m_encoderType = MetalEncoderType::Render; ResetEncoderState(); if (rebindStateIfNewEncoder) { // Rebind all the render state RebindRenderState(renderCommandEncoder); } return renderCommandEncoder; } MTL::ComputeCommandEncoder* MetalRenderer::GetComputeCommandEncoder() { if (m_commandEncoder) { if (m_encoderType == MetalEncoderType::Compute) { return (MTL::ComputeCommandEncoder*)m_commandEncoder; } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); auto computeCommandEncoder = commandBuffer->computeCommandEncoder(); m_commandEncoder = computeCommandEncoder; m_encoderType = MetalEncoderType::Compute; ResetEncoderState(); return computeCommandEncoder; } MTL::BlitCommandEncoder* MetalRenderer::GetBlitCommandEncoder() { if (m_commandEncoder) { if (m_encoderType == MetalEncoderType::Blit) { return (MTL::BlitCommandEncoder*)m_commandEncoder; } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); m_commandEncoder = blitCommandEncoder; m_encoderType = MetalEncoderType::Blit; ResetEncoderState(); return blitCommandEncoder; } void MetalRenderer::EndEncoding() { if (m_commandEncoder) { m_commandEncoder->endEncoding(); m_commandEncoder->release(); m_commandEncoder = nullptr; m_encoderType = MetalEncoderType::None; // Commit the command buffer if enough draw calls have been recorded if (m_recordedDrawcalls >= COMMIT_TRESHOLD) CommitCommandBuffer(); } } void MetalRenderer::CommitCommandBuffer() { m_recordedDrawcalls = 0; if (m_commandBuffers.size() != 0) { EndEncoding(); auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; // Debug //m_commandQueue->insertDebugCaptureBoundary(); } } } bool MetalRenderer::AcquireNextDrawable(bool mainWindow) { const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB; if (latteBufferUsesSRGB != m_state.m_usesSRGB) { m_metalLayer->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm); m_state.m_usesSRGB = latteBufferUsesSRGB; } if (m_drawable) { // TODO: should this be true? return true; } m_drawable = m_metalLayer->nextDrawable(); if (!m_drawable) { debug_printf("failed to acquire next drawable\n"); return false; } return true; } void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) { auto mtlShaderType = GetMtlShaderType(shader->shaderType); sint32 textureCount = shader->resourceMapping.getTextureCount(); for (int i = 0; i < textureCount; ++i) { const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); auto hostTextureUnit = relative_textureUnit; auto textureDim = shader->textureUnitDim[relative_textureUnit]; auto texUnitRegIndex = hostTextureUnit * 7; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: hostTextureUnit += LATTE_CEMU_VS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS; break; case LatteConst::ShaderType::Pixel: hostTextureUnit += LATTE_CEMU_PS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS; break; case LatteConst::ShaderType::Geometry: hostTextureUnit += LATTE_CEMU_GS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS; break; default: UNREACHABLE; } // TODO: uncomment uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; if (binding >= MAX_MTL_TEXTURES) { debug_printf("invalid texture binding %u\n", binding); continue; } auto textureView = m_state.m_textures[hostTextureUnit]; if (!textureView) { // TODO: don't bind if already bound if (textureDim == Latte::E_DIM::DIM_1D) { switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexTexture(m_nullTexture1D, binding); renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentTexture(m_nullTexture1D, binding); renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); break; } default: UNREACHABLE; } } else { switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexTexture(m_nullTexture2D, binding); renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentTexture(m_nullTexture2D, binding); renderCommandEncoder->setVertexSamplerState(m_nearestSampler, binding); break; } default: UNREACHABLE; } } continue; } LatteTexture* baseTexture = textureView->baseTexture; uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; MTL::SamplerState* sampler; if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) { uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType); sampler = m_samplerCache->GetSamplerState(LatteGPUState.contextNew, samplerIndex); } else { sampler = m_nearestSampler; } auto& boundSampler = m_state.m_encoderState.m_samplers[mtlShaderType][binding]; if (sampler != boundSampler) { boundSampler = sampler; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexSamplerState(sampler, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentSamplerState(sampler, binding); break; } default: UNREACHABLE; } } // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; auto& boundTexture = m_state.m_encoderState.m_textures[mtlShaderType][binding]; if (textureView == boundTexture.m_textureView && word4 == boundTexture.m_word4) continue; boundTexture = {textureView, word4}; MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexTexture(mtlTexture, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentTexture(mtlTexture, binding); break; } default: UNREACHABLE; } } // Support buffer auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return supportBufferData + (index / 4); }; sint32 shaderAluConst; sint32 shaderUniformRegisterOffset; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: shaderAluConst = 0x400; shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Pixel: shaderAluConst = 0; shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Geometry: shaderAluConst = 0; // geometry shader has no ALU const shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; break; default: UNREACHABLE; } if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) { if (shader->uniform.list_ufTexRescale.empty() == false) { for (auto& entry : shader->uniform.list_ufTexRescale) { float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit); memcpy(entry.currentValue, xyScale, sizeof(float) * 2); memcpy(GET_UNIFORM_DATA_PTR(entry.uniformLocation), xyScale, sizeof(float) * 2); } } if (shader->uniform.loc_alphaTestRef >= 0) { *GET_UNIFORM_DATA_PTR(shader->uniform.loc_alphaTestRef) = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF(); } if (shader->uniform.loc_pointSize >= 0) { const auto& pointSizeReg = LatteGPUState.contextNew.PA_SU_POINT_SIZE; float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f; if (pointWidth == 0.0f) pointWidth = 1.0f / 8.0f; // minimum size *GET_UNIFORM_DATA_PTR(shader->uniform.loc_pointSize) = pointWidth; } if (shader->uniform.loc_remapped >= 0) { LatteBufferCache_LoadRemappedUniforms(shader, GET_UNIFORM_DATA_PTR(shader->uniform.loc_remapped)); } if (shader->uniform.loc_uniformRegister >= 0) { uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst); memcpy(GET_UNIFORM_DATA_PTR(shader->uniform.loc_uniformRegister), uniformRegData, shader->uniform.count_uniformRegister * 16); } if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0) { sint32 viewportWidth; sint32 viewportHeight; LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport() float* v = GET_UNIFORM_DATA_PTR(shader->uniform.loc_windowSpaceToClipSpaceTransform); v[0] = 2.0f / (float)viewportWidth; v[1] = 2.0f / (float)viewportHeight; } if (shader->uniform.loc_fragCoordScale >= 0) { LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale)); } // TODO: uncomment /* if (shader->uniform.loc_verticesPerInstance >= 0) { *(int*)(supportBufferData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) { if (shader->uniform.loc_streamoutBufferBase[b] >= 0) { *(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset; } } } */ // TODO: uncomment //auto supportBuffer = m_memoryManager->GetBufferAllocation(sizeof(supportBufferData)); //memcpy(supportBuffer.data, supportBufferData, sizeof(supportBufferData)); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { //renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } case LatteConst::ShaderType::Pixel: { //renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBuffer(supportBuffer.bufferIndex), supportBuffer.bufferOffset, MTL_SUPPORT_BUFFER_BINDING); renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } default: UNREACHABLE; } } // Uniform buffers for (sint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) { if (shader->resourceMapping.uniformBuffersBindingPoint[i] >= 0) { uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; if (binding >= MAX_MTL_BUFFERS) { debug_printf("invalid buffer binding%u\n", binding); continue; } size_t offset = m_state.m_uniformBufferOffsets[mtlShaderType][i]; if (offset == INVALID_OFFSET) continue; auto& boundOffset = m_state.m_encoderState.m_uniformBufferOffsets[mtlShaderType][binding]; if (offset == boundOffset) continue; boundOffset = offset; // TODO: only set the offset if already bound switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); break; } default: UNREACHABLE; } } } // Storage buffer if (shader->resourceMapping.tfStorageBindingPoint >= 0) { switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); break; } default: UNREACHABLE; } } } void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder) { // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) vertexBufferRange.needsRebind = true; } } void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) { MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); auto colorAttachment = renderPassDescriptor->colorAttachments()->object(0); colorAttachment->setTexture(mtlTexture); colorAttachment->setClearColor(MTL::ClearColor(r, g, b, a)); colorAttachment->setLoadAction(MTL::LoadActionClear); colorAttachment->setStoreAction(MTL::StoreActionStore); colorAttachment->setSlice(sliceIndex); colorAttachment->setLevel(mipIndex); MTL::Texture* colorRenderTargets[8] = {nullptr}; colorRenderTargets[0] = mtlTexture; GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); EndEncoding(); }