#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Foundation/NSObject.hpp" #include "HW/Latte/Renderer/Metal/CachedFBOMtl.h" #include "HW/Latte/Renderer/Metal/LatteToMtl.h" #include "HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "HW/Latte/Core/FetchShader.h" #include "HW/Latte/ISA/RegDefines.h" #include "Metal/MTLDevice.hpp" #include "Metal/MTLRenderPipeline.hpp" #include "config/ActiveSettings.h" #define INVALID_TITLE_ID 0xFFFFFFFFFFFFFFFF uint64 s_cacheTitleId = INVALID_TITLE_ID; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; template void SetFragmentState(T* desc, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Color attachments const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = lcr.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); uint32 renderTargetMask = lcr.CB_TARGET_MASK.get_MASK(); for (uint8 i = 0; i < 8; i++) { const auto& colorBuffer = activeFBO->colorBuffer[i]; auto texture = static_cast(colorBuffer.texture); if (!texture) { continue; } auto colorAttachment = desc->colorAttachments()->object(i); colorAttachment->setPixelFormat(texture->GetRGBAView()->pixelFormat()); colorAttachment->setWriteMask(GetMtlColorWriteMask((renderTargetMask >> (i * 4)) & 0xF)); // Blending bool blendEnabled = ((blendEnableMask & (1 << i))) != 0; // Only float data type is blendable if (blendEnabled && GetMtlPixelFormatInfo(texture->format, false).dataType == MetalDataType::FLOAT) { colorAttachment->setBlendingEnabled(true); const auto& blendControlReg = lcr.CB_BLENDN_CONTROL[i]; auto rgbBlendOp = GetMtlBlendOp(blendControlReg.get_COLOR_COMB_FCN()); auto srcRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); auto dstRgbBlendFactor = GetMtlBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); colorAttachment->setRgbBlendOperation(rgbBlendOp); colorAttachment->setSourceRGBBlendFactor(srcRgbBlendFactor); colorAttachment->setDestinationRGBBlendFactor(dstRgbBlendFactor); if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) { colorAttachment->setAlphaBlendOperation(GetMtlBlendOp(blendControlReg.get_ALPHA_COMB_FCN())); colorAttachment->setSourceAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_SRCBLEND())); colorAttachment->setDestinationAlphaBlendFactor(GetMtlBlendFactor(blendControlReg.get_ALPHA_DSTBLEND())); } else { colorAttachment->setAlphaBlendOperation(rgbBlendOp); colorAttachment->setSourceAlphaBlendFactor(srcRgbBlendFactor); colorAttachment->setDestinationAlphaBlendFactor(dstRgbBlendFactor); } } } // Depth stencil attachment if (activeFBO->depthBuffer.texture) { auto texture = static_cast(activeFBO->depthBuffer.texture); desc->setDepthAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); if (activeFBO->depthBuffer.hasStencil) { desc->setStencilAttachmentPixelFormat(texture->GetRGBAView()->pixelFormat()); } } } void MetalPipelineCache::ShaderCacheLoading_begin(uint64 cacheTitleId) { s_cacheTitleId = cacheTitleId; } void MetalPipelineCache::ShaderCacheLoading_end() { } void MetalPipelineCache::ShaderCacheLoading_Close() { g_compiled_shaders_total = 0; g_compiled_shaders_async = 0; } MetalPipelineCache::~MetalPipelineCache() { for (auto& pair : m_pipelineCache) { pair.second->release(); } m_pipelineCache.clear(); NS::Error* error = nullptr; m_binaryArchive->serializeToURL(m_binaryArchiveURL, &error); if (error) { debug_printf("failed to serialize binary archive: %s\n", error->localizedDescription()->utf8String()); error->release(); } m_binaryArchive->release(); m_binaryArchiveURL->release(); } MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); auto& pipeline = m_pipelineCache[stateHash]; if (pipeline) return pipeline; // Vertex descriptor MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); for (auto& bufferGroup : fetchShader->bufferGroups) { std::optional fetchType; for (sint32 j = 0; j < bufferGroup.attribCount; ++j) { auto& attr = bufferGroup.attrib[j]; uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; if (semanticId == (uint32)-1) continue; // attribute not used? auto attribute = vertexDescriptor->attributes()->object(semanticId); attribute->setOffset(attr.offset); attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); attribute->setFormat(GetMtlVertexFormat(attr.format)); if (fetchType.has_value()) cemu_assert_debug(fetchType == attr.fetchType); else fetchType = attr.fetchType; if (attr.fetchType == LatteConst::INSTANCE_DATA) { cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported } } uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; bufferStride = Align(bufferStride, 4); // HACK if (bufferStride == 0) { debug_printf("vertex buffer %u has a vertex stride of 0 bytes, using 4 bytes instead\n", bufferIndex); bufferStride = 4; } auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); layout->setStride(bufferStride); if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) layout->setStepFunction(MTL::VertexStepFunctionPerVertex); else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) layout->setStepFunction(MTL::VertexStepFunctionPerInstance); else { debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); cemu_assert(false); } } auto mtlVertexShader = static_cast(vertexShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); mtlVertexShader->CompileVertexFunction(); mtlPixelShader->CompileFragmentFunction(activeFBO); // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); desc->setVertexFunction(mtlVertexShader->GetFunction()); desc->setFragmentFunction(mtlPixelShader->GetFunction()); // TODO: don't always set the vertex descriptor? desc->setVertexDescriptor(vertexDescriptor); SetFragmentState(desc, activeFBO, lcr); TryLoadBinaryArchive(); // Load binary if (m_binaryArchive) { NS::Object* binArchives[] = {m_binaryArchive}; auto binaryArchives = NS::Array::alloc()->init(binArchives, 1); desc->setBinaryArchives(binaryArchives); binaryArchives->release(); } NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Cached render pipeline state", desc)); #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionFailOnBinaryArchiveMiss, nullptr, &error); //static uint32 oldPipelineCount = 0; //static uint32 newPipelineCount = 0; // Pipeline wasn't found in the binary archive, we need to compile it if (error) { desc->setBinaryArchives(nullptr); error->release(); error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("New render pipeline state", desc)); #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, &error); if (error) { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); return nullptr; } else { // Save binary if (m_binaryArchive) { NS::Error* error = nullptr; m_binaryArchive->addRenderPipelineFunctions(desc, &error); if (error) { debug_printf("error saving render pipeline functions: %s\n", error->localizedDescription()->utf8String()); error->release(); } } } //newPipelineCount++; } //else //{ // oldPipelineCount++; //} //debug_printf("%u pipelines were found in the binary archive, %u new were created\n", oldPipelineCount, newPipelineCount); desc->release(); vertexDescriptor->release(); return pipeline; } MTL::RenderPipelineState* MetalPipelineCache::GetMeshPipelineState(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* geometryShader, const LatteDecompilerShader* pixelShader, CachedFBOMtl* activeFBO, const LatteContextRegister& lcr, Renderer::INDEX_TYPE hostIndexType) { uint64 stateHash = CalculateRenderPipelineHash(fetchShader, vertexShader, pixelShader, activeFBO, lcr); stateHash += lcr.GetRawView()[mmVGT_PRIMITIVE_TYPE]; stateHash = std::rotl(stateHash, 7); stateHash += (uint8)hostIndexType; stateHash = std::rotl(stateHash, 7); // TODO: 7?s auto& pipeline = m_pipelineCache[stateHash]; if (pipeline) return pipeline; auto mtlObjectShader = static_cast(vertexShader->shader); auto mtlMeshShader = static_cast(geometryShader->shader); auto mtlPixelShader = static_cast(pixelShader->shader); mtlObjectShader->CompileObjectFunction(lcr, fetchShader, vertexShader, hostIndexType); mtlPixelShader->CompileFragmentFunction(activeFBO); // Render pipeline state MTL::MeshRenderPipelineDescriptor* desc = MTL::MeshRenderPipelineDescriptor::alloc()->init(); desc->setObjectFunction(mtlObjectShader->GetFunction()); desc->setMeshFunction(mtlMeshShader->GetFunction()); desc->setFragmentFunction(mtlPixelShader->GetFunction()); SetFragmentState(desc, activeFBO, lcr); TryLoadBinaryArchive(); // Load binary // TODO: no binary archives? :( NS::Error* error = nullptr; #ifdef CEMU_DEBUG_ASSERT desc->setLabel(GetLabel("Mesh pipeline state", desc)); #endif pipeline = m_mtlr->GetDevice()->newRenderPipelineState(desc, MTL::PipelineOptionNone, nullptr, &error); if (error) { debug_printf("error creating render pipeline state: %s\n", error->localizedDescription()->utf8String()); error->release(); return nullptr; } desc->release(); return pipeline; } uint64 MetalPipelineCache::CalculateRenderPipelineHash(const LatteFetchShader* fetchShader, const LatteDecompilerShader* vertexShader, const LatteDecompilerShader* pixelShader, class CachedFBOMtl* activeFBO, const LatteContextRegister& lcr) { // Hash uint64 stateHash = 0; for (int i = 0; i < Latte::GPU_LIMITS::NUM_COLOR_ATTACHMENTS; ++i) { auto textureView = static_cast(activeFBO->colorBuffer[i].texture); if (!textureView) continue; stateHash += textureView->GetRGBAView()->pixelFormat() + i * 31; stateHash = std::rotl(stateHash, 7); } if (activeFBO->depthBuffer.texture) { auto textureView = static_cast(activeFBO->depthBuffer.texture); stateHash += textureView->GetRGBAView()->pixelFormat(); stateHash = std::rotl(stateHash, 7); } for (auto& group : fetchShader->bufferGroups) { uint32 bufferStride = group.getCurrentBufferStride(lcr.GetRawView()); stateHash = std::rotl(stateHash, 7); stateHash += bufferStride * 3; } stateHash += fetchShader->getVkPipelineHashFragment(); stateHash = std::rotl(stateHash, 7); stateHash += lcr.GetRawView()[mmVGT_STRMOUT_EN]; stateHash = std::rotl(stateHash, 7); if(lcr.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL()) stateHash += 0x333333; stateHash = (stateHash >> 8) + (stateHash * 0x370531ull) % 0x7F980D3BF9B4639Dull; uint32* ctxRegister = lcr.GetRawView(); if (vertexShader) stateHash += vertexShader->baseHash; stateHash = std::rotl(stateHash, 13); if (pixelShader) stateHash += pixelShader->baseHash + pixelShader->auxHash; stateHash = std::rotl(stateHash, 13); uint32 polygonCtrl = lcr.PA_SU_SC_MODE_CNTL.getRawValue(); stateHash += polygonCtrl; stateHash = std::rotl(stateHash, 7); stateHash += ctxRegister[Latte::REGADDR::PA_CL_CLIP_CNTL]; stateHash = std::rotl(stateHash, 7); const auto colorControlReg = ctxRegister[Latte::REGADDR::CB_COLOR_CONTROL]; stateHash += colorControlReg; stateHash += ctxRegister[Latte::REGADDR::CB_TARGET_MASK]; const uint32 blendEnableMask = (colorControlReg >> 8) & 0xFF; if (blendEnableMask) { for (auto i = 0; i < 8; ++i) { if (((blendEnableMask & (1 << i))) == 0) continue; stateHash = std::rotl(stateHash, 7); stateHash += ctxRegister[Latte::REGADDR::CB_BLEND0_CONTROL + i]; } } return stateHash; } void MetalPipelineCache::TryLoadBinaryArchive() { if (m_binaryArchive || s_cacheTitleId == INVALID_TITLE_ID) return; const std::string cacheFilename = fmt::format("{:016x}_mtl_pipelines.bin", s_cacheTitleId); const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename); m_binaryArchiveURL = NS::URL::fileURLWithPath(ToNSString((const char*)cachePath.generic_u8string().c_str())); MTL::BinaryArchiveDescriptor* desc = MTL::BinaryArchiveDescriptor::alloc()->init(); desc->setUrl(m_binaryArchiveURL); NS::Error* error = nullptr; m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); if (error) { desc->setUrl(nullptr); error->release(); error = nullptr; m_binaryArchive = m_mtlr->GetDevice()->newBinaryArchive(desc, &error); if (error) { debug_printf("failed to create binary archive: %s\n", error->localizedDescription()->utf8String()); error->release(); } } desc->release(); }