#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalLayer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Common/precompiled.h" #include "Metal/MTLRenderPass.hpp" #include "gui/guiWrapper.h" #define COMMIT_TRESHOLD 256 extern bool hasValidFramebufferAttached; float supportBufferData[512 * 4]; MetalRenderer::MetalRenderer() { m_device = MTL::CreateSystemDefaultDevice(); m_commandQueue = m_device->newCommandQueue(); MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); m_nearestSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); m_linearSampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->release(); m_memoryManager = new MetalMemoryManager(this); m_pipelineCache = new MetalPipelineCache(this); m_depthStencilCache = new MetalDepthStencilCache(this); // Texture readback m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::StorageModeShared); // Transform feedback m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared); // Initialize state for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++) { for (uint32 j = 0; j < MAX_MTL_BUFFERS; j++) { m_state.m_uniformBufferOffsets[i][j] = INVALID_OFFSET; } } // Utility shader library // Process the source first std::string processedUtilityShaderSource = utilityShaderSource; processedUtilityShaderSource.pop_back(); processedUtilityShaderSource.erase(processedUtilityShaderSource.begin()); processedUtilityShaderSource = "#include \n" + processedUtilityShaderSource; // Create the library NS::Error* error = nullptr; MTL::Library* utilityLibrary = m_device->newLibrary(NS::String::string(processedUtilityShaderSource.c_str(), NS::ASCIIStringEncoding), nullptr, &error); if (error) { debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); throw; return; } // Present pipeline MTL::Function* presentVertexFunction = utilityLibrary->newFunction(NS::String::string("vertexFullscreen", NS::ASCIIStringEncoding)); MTL::Function* presentFragmentFunction = utilityLibrary->newFunction(NS::String::string("fragmentPresent", NS::ASCIIStringEncoding)); MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); renderPipelineDescriptor->setVertexFunction(presentVertexFunction); renderPipelineDescriptor->setFragmentFunction(presentFragmentFunction); presentVertexFunction->release(); presentFragmentFunction->release(); error = nullptr; renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm); m_presentPipelineLinear = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); if (error) { debug_printf("failed to create linear present pipeline (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); } error = nullptr; renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatRGBA8Unorm_sRGB); m_presentPipelineSRGB = m_device->newRenderPipelineState(renderPipelineDescriptor, &error); renderPipelineDescriptor->release(); if (error) { debug_printf("failed to create sRGB present pipeline (error: %s)\n", error->localizedDescription()->utf8String()); error->release(); } // Hybrid pipelines m_copyTextureToTexturePipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexCopyTextureToTexture", "kernelCopyTextureToTexture"); m_restrideBufferPipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexRestrideBuffer", "kernelRestrideBuffer"); utilityLibrary->release(); m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline); } MetalRenderer::~MetalRenderer() { delete m_copyTextureToTexturePipeline; delete m_restrideBufferPipeline; m_presentPipelineLinear->release(); m_presentPipelineSRGB->release(); delete m_depthStencilCache; delete m_pipelineCache; delete m_memoryManager; m_nearestSampler->release(); m_linearSampler->release(); m_readbackBuffer->release(); m_commandQueue->release(); m_device->release(); } // TODO: don't ignore "mainWindow" argument and respect size void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow) { const auto& windowInfo = gui_getWindowInfo().window_main; m_metalLayer = (CA::MetalLayer*)CreateMetalLayer(windowInfo.handle); m_metalLayer->setDevice(m_device); } void MetalRenderer::Initialize() { Renderer::Initialize(); } void MetalRenderer::Shutdown() { Renderer::Shutdown(); CommitCommandBuffer(); } // TODO: what should this do? bool MetalRenderer::IsPadWindowActive() { //debug_printf("MetalRenderer::IsPadWindowActive not implemented\n"); return false; } bool MetalRenderer::GetVRAMInfo(int& usageInMB, int& totalInMB) const { debug_printf("MetalRenderer::GetVRAMInfo not implemented\n"); usageInMB = 1024; totalInMB = 1024; return false; } void MetalRenderer::ClearColorbuffer(bool padView) { if (!AcquireNextDrawable(!padView)) return; ClearColorTextureInternal(m_drawable->texture(), 0, 0, 0.0f, 0.0f, 0.0f, 0.0f); } void MetalRenderer::DrawEmptyFrame(bool mainWindow) { if (!BeginFrame(mainWindow)) return; SwapBuffers(mainWindow, !mainWindow); } void MetalRenderer::SwapBuffers(bool swapTV, bool swapDRC) { if (m_drawable) { auto commandBuffer = GetCommandBuffer(); commandBuffer->presentDrawable(m_drawable); } else { debug_printf("skipped present!\n"); } m_drawable = nullptr; // Release all the command buffers CommitCommandBuffer(); for (uint32 i = 0; i < m_commandBuffers.size(); i++) m_commandBuffers[i].m_commandBuffer->release(); m_commandBuffers.clear(); // Reset temporary buffers m_memoryManager->ResetTemporaryBuffers(); } void MetalRenderer::DrawBackbufferQuad(LatteTextureView* texView, RendererOutputShader* shader, bool useLinearTexFilter, sint32 imageX, sint32 imageY, sint32 imageWidth, sint32 imageHeight, bool padView, bool clearBackground) { if (!AcquireNextDrawable(!padView)) return; MTL::Texture* presentTexture = static_cast(texView)->GetRGBAView(); // Create render pass MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); auto colorAttachment = renderPassDescriptor->colorAttachments()->object(0); colorAttachment->setTexture(m_drawable->texture()); // TODO: shouldn't it be LoadActionLoad when not clearing? colorAttachment->setLoadAction(clearBackground ? MTL::LoadActionClear : MTL::LoadActionDontCare); colorAttachment->setStoreAction(MTL::StoreActionStore); auto renderCommandEncoder = GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); // Draw to Metal layer renderCommandEncoder->setRenderPipelineState(m_state.m_usesSRGB ? m_presentPipelineSRGB : m_presentPipelineLinear); renderCommandEncoder->setFragmentTexture(presentTexture, 0); renderCommandEncoder->setFragmentSamplerState((useLinearTexFilter ? m_linearSampler : m_nearestSampler), 0); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); EndEncoding(); } bool MetalRenderer::BeginFrame(bool mainWindow) { return AcquireNextDrawable(mainWindow); } void MetalRenderer::Flush(bool waitIdle) { // TODO: commit if commit on idle is requested if (m_recordedDrawcalls > 0) CommitCommandBuffer(); if (waitIdle) { // TODO: shouldn't we wait for all command buffers? WaitForCommandBufferCompletion(GetCurrentCommandBuffer()); } } void MetalRenderer::NotifyLatteCommandProcessorIdle() { // TODO: commit if commit on idle is requested //CommitCommandBuffer(); } void MetalRenderer::AppendOverlayDebugInfo() { debug_printf("MetalRenderer::AppendOverlayDebugInfo not implemented\n"); } void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ) { m_state.m_viewport = MTL::Viewport{x, y, width, height, nearZ, farZ}; if (m_encoderType == MetalEncoderType::Render) { static_cast(m_commandEncoder)->setViewport(m_state.m_viewport); } } void MetalRenderer::renderTarget_setScissor(sint32 scissorX, sint32 scissorY, sint32 scissorWidth, sint32 scissorHeight) { m_state.m_scissor = MTL::ScissorRect{NS::UInteger(scissorX), NS::UInteger(scissorY), NS::UInteger(scissorWidth), NS::UInteger(scissorHeight)}; if (m_encoderType == MetalEncoderType::Render) { static_cast(m_commandEncoder)->setScissorRect(m_state.m_scissor); } } LatteCachedFBO* MetalRenderer::rendertarget_createCachedFBO(uint64 key) { return new CachedFBOMtl(key); } void MetalRenderer::rendertarget_deleteCachedFBO(LatteCachedFBO* cfbo) { if (cfbo == (LatteCachedFBO*)m_state.m_activeFBO) m_state.m_activeFBO = nullptr; } void MetalRenderer::rendertarget_bindFramebufferObject(LatteCachedFBO* cfbo) { m_state.m_activeFBO = (CachedFBOMtl*)cfbo; } void* MetalRenderer::texture_acquireTextureUploadBuffer(uint32 size) { return m_memoryManager->GetTextureUploadBuffer(size); } void MetalRenderer::texture_releaseTextureUploadBuffer(uint8* mem) { // TODO: should the texture buffer get released? } TextureDecoder* MetalRenderer::texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) { return GetMtlTextureDecoder(format, isDepth); } void MetalRenderer::texture_clearSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex) { if (hostTexture->isDepth) { texture_clearDepthSlice(hostTexture, sliceIndex, mipIndex, true, hostTexture->hasStencil, 0.0f, 0); } else { texture_clearColorSlice(hostTexture, sliceIndex, mipIndex, 0.0f, 0.0f, 0.0f, 0.0f); } } void MetalRenderer::texture_loadSlice(LatteTexture* hostTexture, sint32 width, sint32 height, sint32 depth, void* pixelData, sint32 sliceIndex, sint32 mipIndex, uint32 compressedImageSize) { auto textureMtl = (LatteTextureMtl*)hostTexture; uint32 offsetZ = 0; if (textureMtl->Is3DTexture()) { offsetZ = sliceIndex; sliceIndex = 0; } size_t bytesPerRow = GetMtlTextureBytesPerRow(textureMtl->GetFormat(), textureMtl->IsDepth(), width); // No need to calculate bytesPerImage for 3D textures, since we always load just one slice //size_t bytesPerImage = GetMtlTextureBytesPerImage(textureMtl->GetFormat(), textureMtl->IsDepth(), height, bytesPerRow); textureMtl->GetTexture()->replaceRegion(MTL::Region(0, 0, offsetZ, width, height, 1), mipIndex, sliceIndex, pixelData, bytesPerRow, 0); } void MetalRenderer::texture_clearColorSlice(LatteTexture* hostTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) { auto mtlTexture = static_cast(hostTexture)->GetTexture(); ClearColorTextureInternal(mtlTexture, sliceIndex, mipIndex, r, g, b, a); } void MetalRenderer::texture_clearDepthSlice(LatteTexture* hostTexture, uint32 sliceIndex, sint32 mipIndex, bool clearDepth, bool clearStencil, float depthValue, uint32 stencilValue) { auto mtlTexture = static_cast(hostTexture)->GetTexture(); MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); if (clearDepth) { auto depthAttachment = renderPassDescriptor->depthAttachment(); depthAttachment->setTexture(mtlTexture); depthAttachment->setClearDepth(depthValue); depthAttachment->setLoadAction(MTL::LoadActionClear); depthAttachment->setStoreAction(MTL::StoreActionStore); depthAttachment->setSlice(sliceIndex); depthAttachment->setLevel(mipIndex); } if (clearStencil) { auto stencilAttachment = renderPassDescriptor->stencilAttachment(); stencilAttachment->setTexture(mtlTexture); stencilAttachment->setClearStencil(stencilValue); stencilAttachment->setLoadAction(MTL::LoadActionClear); stencilAttachment->setStoreAction(MTL::StoreActionStore); stencilAttachment->setSlice(sliceIndex); stencilAttachment->setLevel(mipIndex); } GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); EndEncoding(); } LatteTexture* MetalRenderer::texture_createTextureEx(Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) { return new LatteTextureMtl(this, dim, physAddress, physMipAddress, format, width, height, depth, pitch, mipLevels, swizzle, tileMode, isDepth); } void MetalRenderer::texture_setLatteTexture(LatteTextureView* textureView, uint32 textureUnit) { m_state.m_textures[textureUnit] = static_cast(textureView); } void MetalRenderer::texture_copyImageSubData(LatteTexture* src, sint32 srcMip, sint32 effectiveSrcX, sint32 effectiveSrcY, sint32 srcSlice, LatteTexture* dst, sint32 dstMip, sint32 effectiveDstX, sint32 effectiveDstY, sint32 dstSlice, sint32 effectiveCopyWidth, sint32 effectiveCopyHeight, sint32 srcDepth_) { auto blitCommandEncoder = GetBlitCommandEncoder(); auto mtlSrc = static_cast(src)->GetTexture(); auto mtlDst = static_cast(dst)->GetTexture(); uint32 srcBaseLayer = 0; uint32 dstBaseLayer = 0; uint32 srcOffsetZ = 0; uint32 dstOffsetZ = 0; uint32 srcLayerCount = 1; uint32 dstLayerCount = 1; uint32 srcDepth = 1; uint32 dstDepth = 1; if (src->Is3DTexture()) { srcOffsetZ = srcSlice; srcDepth = srcDepth_; } else { srcBaseLayer = srcSlice; srcLayerCount = srcDepth_; } if (dst->Is3DTexture()) { dstOffsetZ = dstSlice; dstDepth = srcDepth_; } else { dstBaseLayer = dstSlice; dstLayerCount = srcDepth_; } // If copying whole textures, we can do a more efficient copy if (effectiveSrcX == 0 && effectiveSrcY == 0 && effectiveDstX == 0 && effectiveDstY == 0 && srcOffsetZ == 0 && dstOffsetZ == 0 && effectiveCopyWidth == src->GetMipWidth(srcMip) && effectiveCopyHeight == src->GetMipHeight(srcMip) && srcDepth == src->GetMipDepth(srcMip) && effectiveCopyWidth == dst->GetMipWidth(dstMip) && effectiveCopyHeight == dst->GetMipHeight(dstMip) && dstDepth == dst->GetMipDepth(dstMip) && srcLayerCount == dstLayerCount) { blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer, srcMip, mtlDst, dstBaseLayer, dstMip, srcLayerCount, 1); } else { if (srcLayerCount == dstLayerCount) { for (uint32 i = 0; i < srcLayerCount; i++) { blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer + i, srcMip, MTL::Origin(effectiveSrcX, effectiveSrcY, srcOffsetZ), MTL::Size(effectiveCopyWidth, effectiveCopyHeight, srcDepth), mtlDst, dstBaseLayer + i, dstMip, MTL::Origin(effectiveDstX, effectiveDstY, dstOffsetZ)); } } else { for (uint32 i = 0; i < std::max(srcLayerCount, dstLayerCount); i++) { if (srcLayerCount == 1) srcOffsetZ++; else srcSlice++; if (dstLayerCount == 1) dstOffsetZ++; else dstSlice++; blitCommandEncoder->copyFromTexture(mtlSrc, srcBaseLayer, srcMip, MTL::Origin(effectiveSrcX, effectiveSrcY, srcOffsetZ), MTL::Size(effectiveCopyWidth, effectiveCopyHeight, 1), mtlDst, dstBaseLayer, dstMip, MTL::Origin(effectiveDstX, effectiveDstY, dstOffsetZ)); } } } } LatteTextureReadbackInfo* MetalRenderer::texture_createReadback(LatteTextureView* textureView) { size_t uploadSize = static_cast(textureView->baseTexture)->GetTexture()->allocatedSize(); if ((m_readbackBufferWriteOffset + uploadSize) > TEXTURE_READBACK_SIZE) { m_readbackBufferWriteOffset = 0; } auto* result = new LatteTextureReadbackInfoMtl(this, textureView, m_readbackBufferWriteOffset); m_readbackBufferWriteOffset += uploadSize; return result; } void MetalRenderer::surfaceCopy_copySurfaceWithFormatConversion(LatteTexture* sourceTexture, sint32 srcMip, sint32 srcSlice, LatteTexture* destinationTexture, sint32 dstMip, sint32 dstSlice, sint32 width, sint32 height) { GetCommandBuffer(); // scale copy size to effective size sint32 effectiveCopyWidth = width; sint32 effectiveCopyHeight = height; LatteTexture_scaleToEffectiveSize(sourceTexture, &effectiveCopyWidth, &effectiveCopyHeight, 0); sint32 sourceEffectiveWidth, sourceEffectiveHeight; sourceTexture->GetEffectiveSize(sourceEffectiveWidth, sourceEffectiveHeight, srcMip); sint32 texSrcMip = srcMip; sint32 texSrcSlice = srcSlice; sint32 texDstMip = dstMip; sint32 texDstSlice = dstSlice; LatteTextureMtl* srcTextureMtl = static_cast(sourceTexture); LatteTextureMtl* dstTextureMtl = static_cast(destinationTexture); // check if texture rescale ratios match // todo - if not, we have to use drawcall based copying if (!LatteTexture_doesEffectiveRescaleRatioMatch(srcTextureMtl, texSrcMip, dstTextureMtl, texDstMip)) { cemuLog_logDebug(LogType::Force, "surfaceCopy_copySurfaceWithFormatConversion(): Mismatching dimensions"); return; } // check if bpp size matches if (srcTextureMtl->GetBPP() != dstTextureMtl->GetBPP()) { cemuLog_logDebug(LogType::Force, "surfaceCopy_copySurfaceWithFormatConversion(): Mismatching BPP"); return; } MTL::Texture* textures[] = {srcTextureMtl->GetTexture(), dstTextureMtl->GetTexture()}; struct CopyParams { uint32 width; uint32 srcMip; uint32 srcSlice; uint32 dstMip; uint32 dstSlice; } params{(uint32)effectiveCopyWidth, (uint32)texSrcMip, (uint32)texSrcSlice, (uint32)texDstMip, (uint32)texDstSlice}; if (m_encoderType == MetalEncoderType::Render) { auto renderCommandEncoder = static_cast(m_commandEncoder); renderCommandEncoder->setRenderPipelineState(m_copyTextureToTexturePipeline->GetRenderPipelineState()); renderCommandEncoder->setViewport(MTL::Viewport{0.0, 0.0, (double)effectiveCopyWidth, (double)effectiveCopyHeight, 0.0, 1.0}); renderCommandEncoder->setScissorRect(MTL::ScissorRect{0, 0, (uint32)effectiveCopyWidth, (uint32)effectiveCopyHeight}); renderCommandEncoder->setVertexTextures(textures, NS::Range(0, 2)); renderCommandEncoder->setVertexBytes(¶ms, sizeof(params), 0); renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), NS::UInteger(3)); } else { // TODO: do the copy in a compute shader debug_printf("surfaceCopy_copySurfaceWithFormatConversion: no active render command encoder, skipping copy\n"); } // TODO: restore state } void MetalRenderer::bufferCache_init(const sint32 bufferSize) { m_memoryManager->InitBufferCache(bufferSize); } void MetalRenderer::bufferCache_upload(uint8* buffer, sint32 size, uint32 bufferOffset) { m_memoryManager->UploadToBufferCache(buffer, bufferOffset, size); } void MetalRenderer::bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) { m_memoryManager->CopyBufferCache(srcOffset, dstOffset, size); } void MetalRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uint32 dstOffset, uint32 size) { debug_printf("MetalRenderer::bufferCache_copyStreamoutToMainBuffer not implemented\n"); } void MetalRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset, uint32 size) { cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS); auto& buffer = m_state.m_vertexBuffers[bufferIndex]; if (buffer.offset == offset && buffer.size == size) return; if (buffer.offset != INVALID_OFFSET) { m_memoryManager->UntrackVertexBuffer(bufferIndex); } buffer.needsRebind = true; buffer.offset = offset; buffer.size = size; buffer.restrideInfo = {}; m_memoryManager->TrackVertexBuffer(bufferIndex, offset, size, &buffer.restrideInfo); } void MetalRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) { m_state.m_uniformBufferOffsets[(uint32)shaderType][bufferIndex] = offset; } RendererShader* MetalRenderer::shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) { return new RendererShaderMtl(this, type, baseHash, auxHash, isGameShader, isGfxPackShader, source); } void MetalRenderer::streamout_setupXfbBuffer(uint32 bufferIndex, sint32 ringBufferOffset, uint32 rangeAddr, uint32 rangeSize) { debug_printf("MetalRenderer::streamout_setupXfbBuffer not implemented\n"); } void MetalRenderer::streamout_begin() { debug_printf("MetalRenderer::streamout_begin not implemented\n"); } void MetalRenderer::streamout_rendererFinishDrawcall() { debug_printf("MetalRenderer::streamout_rendererFinishDrawcall not implemented\n"); } void MetalRenderer::draw_beginSequence() { m_state.m_skipDrawSequence = false; // update shader state LatteSHRC_UpdateActiveShaders(); if (LatteGPUState.activeShaderHasError) { debug_printf("Skipping drawcalls due to shader error\n"); m_state.m_skipDrawSequence = true; cemu_assert_debug(false); return; } // update render target and texture state LatteGPUState.requiresTextureBarrier = false; while (true) { LatteGPUState.repeatTextureInitialization = false; if (!LatteMRT::UpdateCurrentFBO()) { debug_printf("Rendertarget invalid\n"); m_state.m_skipDrawSequence = true; return; // no render target } if (!hasValidFramebufferAttached) { debug_printf("Drawcall with no color buffer or depth buffer attached\n"); m_state.m_skipDrawSequence = true; return; // no render target } LatteTexture_updateTextures(); if (!LatteGPUState.repeatTextureInitialization) break; } // apply render target LatteMRT::ApplyCurrentState(); // viewport and scissor box LatteRenderTarget_updateViewport(); LatteRenderTarget_updateScissorBox(); // check for conditions which would turn the drawcalls into no-ops bool rasterizerEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL() == false; // GX2SetSpecialState(0, true) enables DX_RASTERIZATION_KILL, but still expects depth writes to happen? -> Research which stages are disabled by DX_RASTERIZATION_KILL exactly // for now we use a workaround: if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) rasterizerEnable = true; if (!rasterizerEnable == false) m_state.m_skipDrawSequence = true; // TODO: is this even needed? if (!m_state.m_activeFBO) m_state.m_skipDrawSequence = true; } void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 instanceCount, uint32 count, MPTR indexDataMPTR, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE indexType, bool isFirst) { // TODO: uncomment //if (m_state.m_skipDrawSequence) //{ // LatteGPUState.drawCallCounter++; // return; //} // Render pass auto renderCommandEncoder = GetRenderCommandEncoder(); // Shaders LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader(); LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); if (!vertexShader || !static_cast(vertexShader->shader)->GetFunction()) { debug_printf("no vertex function, skipping draw\n"); return; } const auto fetchShader = LatteSHRC_GetActiveFetchShader(); // Depth stencil state MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew); renderCommandEncoder->setDepthStencilState(depthStencilState); // Stencil reference bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); if (stencilEnable) { bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE(); uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F(); uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B(); if (backStencilEnable) renderCommandEncoder->setStencilReferenceValues(stencilRefFront, stencilRefBack); else renderCommandEncoder->setStencilReferenceValue(stencilRefFront); } // Primitive type const LattePrimitiveMode primitiveMode = static_cast(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]); auto mtlPrimitiveType = GetMtlPrimitiveType(primitiveMode); bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); // Blend color float* blendColorConstant = (float*)LatteGPUState.contextRegister + Latte::REGADDR::CB_BLEND_RED; renderCommandEncoder->setBlendColor(blendColorConstant[0], blendColorConstant[1], blendColorConstant[2], blendColorConstant[3]); // polygon control const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; const auto frontFace = polygonControlReg.get_FRONT_FACE(); uint32 cullFront = polygonControlReg.get_CULL_FRONT(); uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); // TODO //cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually //bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; if (polyOffsetFrontEnable) { //uint32 frontScaleU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.getRawValue(); //uint32 frontOffsetU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_OFFSET.getRawValue(); //uint32 offsetClampU32 = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_CLAMP.getRawValue(); float frontScale = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_SCALE.get_SCALE(); float frontOffset = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_FRONT_OFFSET.get_OFFSET(); float offsetClamp = LatteGPUState.contextNew.PA_SU_POLY_OFFSET_CLAMP.get_CLAMP(); frontScale /= 16.0f; renderCommandEncoder->setDepthBias(frontOffset, frontScale, offsetClamp); } else { renderCommandEncoder->setDepthBias(0.0f, 0.0f, 0.0f); } // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) { if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CW) cullFront = cullBack; else cullBack = cullFront; } if (cullFront && cullBack) return; // We can just skip the draw (TODO: can we?) else if (cullFront) renderCommandEncoder->setCullMode(MTL::CullModeFront); else if (cullBack) renderCommandEncoder->setCullMode(MTL::CullModeBack); else renderCommandEncoder->setCullMode(MTL::CullModeNone); if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CCW) renderCommandEncoder->setFrontFacingWinding(MTL::WindingCounterClockwise); else renderCommandEncoder->setFrontFacingWinding(MTL::WindingClockwise); // Resources // Index buffer Renderer::INDEX_TYPE hostIndexType; uint32 hostIndexCount; uint32 indexMin = 0; uint32 indexMax = 0; uint32 indexBufferOffset = 0; uint32 indexBufferIndex = 0; LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); // synchronize vertex and uniform cache and update buffer bindings LatteBufferCache_Sync(indexMin + baseVertex, indexMax + baseVertex, baseInstance, instanceCount); // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) { // Restride uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride); // Bind if (vertexBufferRange.needsRebind) { renderCommandEncoder->setVertexBuffer(restridedBuffer.buffer, restridedBuffer.offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); vertexBufferRange.needsRebind = false; } } } // Render pipeline state MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_lastUsedFBO, LatteGPUState.contextNew); renderCommandEncoder->setRenderPipelineState(renderPipelineState); // Uniform buffers, textures and samplers BindStageResources(renderCommandEncoder, vertexShader); BindStageResources(renderCommandEncoder, pixelShader); // Draw if (hostIndexType != INDEX_TYPE::NONE) { auto mtlIndexType = GetMtlIndexType(hostIndexType); MTL::Buffer* indexBuffer = m_memoryManager->GetBuffer(indexBufferIndex); renderCommandEncoder->drawIndexedPrimitives(mtlPrimitiveType, hostIndexCount, mtlIndexType, indexBuffer, indexBufferOffset, instanceCount, baseVertex, baseInstance); } else { renderCommandEncoder->drawPrimitives(mtlPrimitiveType, baseVertex, count, instanceCount, baseInstance); } LatteGPUState.drawCallCounter++; } void MetalRenderer::draw_endSequence() { LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader(); // post-drawcall logic if (pixelShader) LatteRenderTarget_trackUpdates(); bool hasReadback = LatteTextureReadback_Update(); m_recordedDrawcalls++; // The number of draw calls needs to twice as big, since we are interrupting the render pass if (m_recordedDrawcalls >= COMMIT_TRESHOLD * 2 || hasReadback) { CommitCommandBuffer(); // TODO: where should this be called? LatteTextureReadback_UpdateFinishedTransfers(false); } } void* MetalRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) { auto allocation = m_memoryManager->GetBufferAllocation(size, 4); offset = allocation.bufferOffset; bufferIndex = allocation.bufferIndex; return allocation.data; } void MetalRenderer::indexData_uploadIndexMemory(uint32 offset, uint32 size) { // Do nothing, since the buffer has shared storage mode } MTL::CommandBuffer* MetalRenderer::GetCommandBuffer() { bool needsNewCommandBuffer = (m_commandBuffers.empty() || m_commandBuffers.back().m_commited); if (needsNewCommandBuffer) { // Debug //m_commandQueue->insertDebugCaptureBoundary(); MTL::CommandBuffer* mtlCommandBuffer = m_commandQueue->commandBuffer(); m_commandBuffers.push_back({mtlCommandBuffer}); return mtlCommandBuffer; } else { return m_commandBuffers.back().m_commandBuffer; } } bool MetalRenderer::CommandBufferCompleted(MTL::CommandBuffer* commandBuffer) { return commandBuffer->status() == MTL::CommandBufferStatusCompleted; } void MetalRenderer::WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer) { commandBuffer->waitUntilCompleted(); } MTL::RenderCommandEncoder* MetalRenderer::GetTemporaryRenderCommandEncoder(MTL::RenderPassDescriptor* renderPassDescriptor) { EndEncoding(); auto commandBuffer = GetCommandBuffer(); auto renderCommandEncoder = commandBuffer->renderCommandEncoder(renderPassDescriptor); m_commandEncoder = renderCommandEncoder; m_encoderType = MetalEncoderType::Render; return renderCommandEncoder; } // Some render passes clear the attachments, forceRecreate is supposed to be used in those cases MTL::RenderCommandEncoder* MetalRenderer::GetRenderCommandEncoder(bool forceRecreate, bool rebindStateIfNewEncoder) { // Check if we need to begin a new render pass if (m_commandEncoder) { if (!forceRecreate) { if (m_encoderType == MetalEncoderType::Render) { bool needsNewRenderPass = (m_state.m_lastUsedFBO == nullptr); if (!needsNewRenderPass) { for (uint8 i = 0; i < 8; i++) { if (m_state.m_activeFBO->colorBuffer[i].texture && m_state.m_activeFBO->colorBuffer[i].texture != m_state.m_lastUsedFBO->colorBuffer[i].texture) { needsNewRenderPass = true; break; } } } if (!needsNewRenderPass) { if (m_state.m_activeFBO->depthBuffer.texture && m_state.m_activeFBO->depthBuffer.texture != m_state.m_lastUsedFBO->depthBuffer.texture) { needsNewRenderPass = true; } } if (!needsNewRenderPass) { return (MTL::RenderCommandEncoder*)m_commandEncoder; } } } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); // Update state m_state.m_lastUsedFBO = m_state.m_activeFBO; auto renderCommandEncoder = commandBuffer->renderCommandEncoder(m_state.m_activeFBO->GetRenderPassDescriptor()); m_commandEncoder = renderCommandEncoder; m_encoderType = MetalEncoderType::Render; if (rebindStateIfNewEncoder) { // Rebind all the render state RebindRenderState(renderCommandEncoder); } return renderCommandEncoder; } MTL::ComputeCommandEncoder* MetalRenderer::GetComputeCommandEncoder() { if (m_commandEncoder) { if (m_encoderType == MetalEncoderType::Compute) { return (MTL::ComputeCommandEncoder*)m_commandEncoder; } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); auto computeCommandEncoder = commandBuffer->computeCommandEncoder(); m_commandEncoder = computeCommandEncoder; m_encoderType = MetalEncoderType::Compute; return computeCommandEncoder; } MTL::BlitCommandEncoder* MetalRenderer::GetBlitCommandEncoder() { if (m_commandEncoder) { if (m_encoderType == MetalEncoderType::Blit) { return (MTL::BlitCommandEncoder*)m_commandEncoder; } EndEncoding(); } auto commandBuffer = GetCommandBuffer(); auto blitCommandEncoder = commandBuffer->blitCommandEncoder(); m_commandEncoder = blitCommandEncoder; m_encoderType = MetalEncoderType::Blit; return blitCommandEncoder; } void MetalRenderer::EndEncoding() { if (m_commandEncoder) { m_commandEncoder->endEncoding(); m_commandEncoder->release(); m_commandEncoder = nullptr; m_encoderType = MetalEncoderType::None; // Commit the command buffer if enough draw calls have been recorded if (m_recordedDrawcalls >= COMMIT_TRESHOLD) CommitCommandBuffer(); } } void MetalRenderer::CommitCommandBuffer() { m_recordedDrawcalls = 0; if (m_commandBuffers.size() != 0) { EndEncoding(); auto& commandBuffer = m_commandBuffers.back(); if (!commandBuffer.m_commited) { commandBuffer.m_commandBuffer->commit(); commandBuffer.m_commited = true; // Debug //m_commandQueue->insertDebugCaptureBoundary(); } } } bool MetalRenderer::AcquireNextDrawable(bool mainWindow) { const bool latteBufferUsesSRGB = mainWindow ? LatteGPUState.tvBufferUsesSRGB : LatteGPUState.drcBufferUsesSRGB; if (latteBufferUsesSRGB != m_state.m_usesSRGB) { m_metalLayer->setPixelFormat(latteBufferUsesSRGB ? MTL::PixelFormatRGBA8Unorm_sRGB : MTL::PixelFormatRGBA8Unorm); m_state.m_usesSRGB = latteBufferUsesSRGB; } if (m_drawable) { // TODO: should this be true? return true; } m_drawable = m_metalLayer->nextDrawable(); if (!m_drawable) { printf("failed to acquire next drawable\n"); return false; } return true; } void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader) { sint32 textureCount = shader->resourceMapping.getTextureCount(); for (int i = 0; i < textureCount; ++i) { const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i); auto hostTextureUnit = relative_textureUnit; auto textureDim = shader->textureUnitDim[relative_textureUnit]; auto texUnitRegIndex = hostTextureUnit * 7; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: hostTextureUnit += LATTE_CEMU_VS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS; break; case LatteConst::ShaderType::Pixel: hostTextureUnit += LATTE_CEMU_PS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS; break; case LatteConst::ShaderType::Geometry: hostTextureUnit += LATTE_CEMU_GS_TEX_UNIT_BASE; texUnitRegIndex += Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS; break; default: UNREACHABLE; } auto textureView = m_state.m_textures[hostTextureUnit]; if (!textureView) { debug_printf("invalid bound texture view %u\n", hostTextureUnit); continue; } LatteTexture* baseTexture = textureView->baseTexture; // get texture register word 0 uint32 word4 = LatteGPUState.contextRegister[texUnitRegIndex + 4]; // TODO: wht //auto imageViewObj = textureView->GetSamplerView(word4); //info.imageView = imageViewObj->m_textureImageView; // TODO: uncomment uint32 binding = shader->resourceMapping.getTextureBaseBindingPoint() + i;//shader->resourceMapping.textureUnitToBindingPoint[hostTextureUnit]; //uint32 textureBinding = binding % MAX_MTL_TEXTURES; //uint32 samplerBinding = binding % MAX_MTL_SAMPLERS; if (binding >= MAX_MTL_TEXTURES) { debug_printf("invalid texture binding %u\n", binding); continue; } uint32 stageSamplerIndex = shader->textureUnitSamplerAssignment[relative_textureUnit]; if (stageSamplerIndex != LATTE_DECOMPILER_SAMPLER_NONE) { uint32 samplerIndex = stageSamplerIndex + LatteDecompiler_getTextureSamplerBaseIndex(shader->shaderType); const _LatteRegisterSetSampler* samplerWords = LatteGPUState.contextNew.SQ_TEX_SAMPLER + samplerIndex; // TODO: cache this instead MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); // lod uint32 iMinLOD = samplerWords->WORD1.get_MIN_LOD(); uint32 iMaxLOD = samplerWords->WORD1.get_MAX_LOD(); sint32 iLodBias = samplerWords->WORD1.get_LOD_BIAS(); // apply relative lod bias from graphic pack if (baseTexture->overwriteInfo.hasRelativeLodBias) iLodBias += baseTexture->overwriteInfo.relativeLodBias; // apply absolute lod bias from graphic pack if (baseTexture->overwriteInfo.hasLodBias) iLodBias = baseTexture->overwriteInfo.lodBias; auto filterMip = samplerWords->WORD0.get_MIP_FILTER(); if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::NONE) { samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); samplerDescriptor->setLodMinClamp(0.0f); samplerDescriptor->setLodMaxClamp(0.25f); } else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::POINT) { samplerDescriptor->setMipFilter(MTL::SamplerMipFilterNearest); samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); } else if (filterMip == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_Z_FILTER::LINEAR) { samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); } else { // fallback for invalid constants samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); samplerDescriptor->setLodMinClamp((float)iMinLOD / 64.0f); samplerDescriptor->setLodMaxClamp((float)iMaxLOD / 64.0f); } auto filterMin = samplerWords->WORD0.get_XY_MIN_FILTER(); cemu_assert_debug(filterMin != Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::BICUBIC); // todo samplerDescriptor->setMinFilter((filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); auto filterMag = samplerWords->WORD0.get_XY_MAG_FILTER(); samplerDescriptor->setMagFilter((filterMag == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::POINT || filterMin == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_XY_FILTER::ANISO_POINT) ? MTL::SamplerMinMagFilterNearest : MTL::SamplerMinMagFilterLinear); auto filterZ = samplerWords->WORD0.get_Z_FILTER(); // todo: z-filter for texture array samplers is customizable for GPU7 but OpenGL/Vulkan doesn't expose this functionality? auto clampX = samplerWords->WORD0.get_CLAMP_X(); auto clampY = samplerWords->WORD0.get_CLAMP_Y(); auto clampZ = samplerWords->WORD0.get_CLAMP_Z(); samplerDescriptor->setSAddressMode(GetMtlSamplerAddressMode(clampX)); samplerDescriptor->setTAddressMode(GetMtlSamplerAddressMode(clampY)); samplerDescriptor->setRAddressMode(GetMtlSamplerAddressMode(clampZ)); auto maxAniso = samplerWords->WORD0.get_MAX_ANISO_RATIO(); if (baseTexture->overwriteInfo.anisotropicLevel >= 0) maxAniso = baseTexture->overwriteInfo.anisotropicLevel; if (maxAniso > 0) { samplerDescriptor->setMaxAnisotropy(1 << maxAniso); } // TODO: set lod bias //samplerInfo.mipLodBias = (float)iLodBias / 64.0f; // depth compare uint8 depthCompareMode = shader->textureUsesDepthCompare[relative_textureUnit] ? 1 : 0; if (depthCompareMode == 1) { // TODO: is it okay to just cast? samplerDescriptor->setCompareFunction(GetMtlCompareFunc((Latte::E_COMPAREFUNC)samplerWords->WORD0.get_DEPTH_COMPARE_FUNCTION())); } // border auto borderType = samplerWords->WORD0.get_BORDER_COLOR_TYPE(); if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::TRANSPARENT_BLACK) samplerDescriptor->setBorderColor(MTL::SamplerBorderColorTransparentBlack); else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_BLACK) samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); else if (borderType == Latte::LATTE_SQ_TEX_SAMPLER_WORD0_0::E_BORDER_COLOR_TYPE::OPAQUE_WHITE) samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueWhite); else { // Metal doesn't support custom border color samplerDescriptor->setBorderColor(MTL::SamplerBorderColorOpaqueBlack); } MTL::SamplerState* sampler = m_device->newSamplerState(samplerDescriptor); samplerDescriptor->release(); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexSamplerState(sampler, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentSamplerState(sampler, binding); break; } default: UNREACHABLE; } sampler->release(); } MTL::Texture* mtlTexture = textureView->GetSwizzledView(word4); switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexTexture(mtlTexture, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentTexture(mtlTexture, binding); break; } default: UNREACHABLE; } } // Support buffer auto GET_UNIFORM_DATA_PTR = [&](size_t index) { return supportBufferData + (index / 4); }; sint32 shaderAluConst; sint32 shaderUniformRegisterOffset; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: shaderAluConst = 0x400; shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Pixel: shaderAluConst = 0; shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START; break; case LatteConst::ShaderType::Geometry: shaderAluConst = 0; // geometry shader has no ALU const shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START; break; default: UNREACHABLE; } if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) { if (shader->uniform.list_ufTexRescale.empty() == false) { for (auto& entry : shader->uniform.list_ufTexRescale) { float* xyScale = LatteTexture_getEffectiveTextureScale(shader->shaderType, entry.texUnit); memcpy(entry.currentValue, xyScale, sizeof(float) * 2); memcpy(GET_UNIFORM_DATA_PTR(entry.uniformLocation), xyScale, sizeof(float) * 2); } } if (shader->uniform.loc_alphaTestRef >= 0) { *GET_UNIFORM_DATA_PTR(shader->uniform.loc_alphaTestRef) = LatteGPUState.contextNew.SX_ALPHA_REF.get_ALPHA_TEST_REF(); } if (shader->uniform.loc_pointSize >= 0) { const auto& pointSizeReg = LatteGPUState.contextNew.PA_SU_POINT_SIZE; float pointWidth = (float)pointSizeReg.get_WIDTH() / 8.0f; if (pointWidth == 0.0f) pointWidth = 1.0f / 8.0f; // minimum size *GET_UNIFORM_DATA_PTR(shader->uniform.loc_pointSize) = pointWidth; } if (shader->uniform.loc_remapped >= 0) { LatteBufferCache_LoadRemappedUniforms(shader, GET_UNIFORM_DATA_PTR(shader->uniform.loc_remapped)); } if (shader->uniform.loc_uniformRegister >= 0) { uint32* uniformRegData = (uint32*)(LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst); memcpy(GET_UNIFORM_DATA_PTR(shader->uniform.loc_uniformRegister), uniformRegData, shader->uniform.count_uniformRegister * 16); } if (shader->uniform.loc_windowSpaceToClipSpaceTransform >= 0) { sint32 viewportWidth; sint32 viewportHeight; LatteRenderTarget_GetCurrentVirtualViewportSize(&viewportWidth, &viewportHeight); // always call after _updateViewport() float* v = GET_UNIFORM_DATA_PTR(shader->uniform.loc_windowSpaceToClipSpaceTransform); v[0] = 2.0f / (float)viewportWidth; v[1] = 2.0f / (float)viewportHeight; } if (shader->uniform.loc_fragCoordScale >= 0) { LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale)); } // TODO: uncomment /* if (shader->uniform.loc_verticesPerInstance >= 0) { *(int*)(supportBufferData + ((size_t)shader->uniform.loc_verticesPerInstance / 4)) = m_streamoutState.verticesPerInstance; for (sint32 b = 0; b < LATTE_NUM_STREAMOUT_BUFFER; b++) { if (shader->uniform.loc_streamoutBufferBase[b] >= 0) { *(uint32*)GET_UNIFORM_DATA_PTR(shader->uniform.loc_streamoutBufferBase[b]) = m_streamoutState.buffer[b].ringBufferOffset; } } } */ switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentBytes(supportBufferData, sizeof(supportBufferData), MTL_SUPPORT_BUFFER_BINDING); break; } default: UNREACHABLE; } } // Uniform buffers for (sint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) { if (shader->resourceMapping.uniformBuffersBindingPoint[i] >= 0) { uint32 binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; if (binding >= MAX_MTL_BUFFERS) { debug_printf("too big buffer index (%u), skipping binding\n", binding); continue; } size_t offset = m_state.m_uniformBufferOffsets[(uint32)shader->shaderType][i]; if (offset != INVALID_OFFSET) { switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexBuffer(m_memoryManager->GetBufferCache(), offset, binding); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentBuffer(m_memoryManager->GetBufferCache(), offset, binding); break; } default: UNREACHABLE; } } } } // Storage buffer if (shader->resourceMapping.tfStorageBindingPoint >= 0) { switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: { renderCommandEncoder->setVertexBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); break; } case LatteConst::ShaderType::Pixel: { renderCommandEncoder->setFragmentBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint); break; } default: UNREACHABLE; } } } void MetalRenderer::RebindRenderState(MTL::RenderCommandEncoder* renderCommandEncoder) { // Viewport renderCommandEncoder->setViewport(m_state.m_viewport); // Scissor renderCommandEncoder->setScissorRect(m_state.m_scissor); // Vertex buffers for (uint8 i = 0; i < MAX_MTL_BUFFERS; i++) { auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) vertexBufferRange.needsRebind = true; } } void MetalRenderer::ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a) { MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::alloc()->init(); auto colorAttachment = renderPassDescriptor->colorAttachments()->object(0); colorAttachment->setTexture(mtlTexture); colorAttachment->setClearColor(MTL::ClearColor(r, g, b, a)); colorAttachment->setLoadAction(MTL::LoadActionClear); colorAttachment->setStoreAction(MTL::StoreActionStore); colorAttachment->setSlice(sliceIndex); colorAttachment->setLevel(mipIndex); MTL::Texture* colorRenderTargets[8] = {nullptr}; colorRenderTargets[0] = mtlTexture; GetTemporaryRenderCommandEncoder(renderPassDescriptor); renderPassDescriptor->release(); EndEncoding(); }