#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/OS/libs/gx2/GX2.h" #include "config/ActiveSettings.h" #include "util/helpers/Serializer.h" #include "Cafe/HW/Latte/Common/RegisterSerializer.h" std::mutex s_nvidiaWorkaround; /* rects emulation */ void rectsEmulationGS_outputSingleVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 vIdx, const LatteContextRegister& latteRegister) { auto parameterMask = vertexShader->outputParameterMask; for (uint32 i = 0; i < 32; i++) { if ((parameterMask & (1 << i)) == 0) continue; sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); if (vsSemanticId < 0) continue; // make sure PS has matching input if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) continue; gsSrc.append(fmt::format("passParameterSem{}Out = passParameterSem{}In[{}];\r\n", vsSemanticId, vsSemanticId, vIdx)); } gsSrc.append(fmt::format("gl_Position = gl_in[{}].gl_Position;\r\n", vIdx)); gsSrc.append("EmitVertex();\r\n"); } void rectsEmulationGS_outputGeneratedVertex(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, const char* variant, const LatteContextRegister& latteRegister) { auto parameterMask = vertexShader->outputParameterMask; for (uint32 i = 0; i < 32; i++) { if ((parameterMask & (1 << i)) == 0) continue; sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); if (vsSemanticId < 0) continue; // make sure PS has matching input if (!psInputTable->hasPSImportForSemanticId(vsSemanticId)) continue; gsSrc.append(fmt::format("passParameterSem{}Out = gen4thVertex{}(passParameterSem{}In[0], passParameterSem{}In[1], passParameterSem{}In[2]);\r\n", vsSemanticId, variant, vsSemanticId, vsSemanticId, vsSemanticId)); } gsSrc.append(fmt::format("gl_Position = gen4thVertex{}(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_in[2].gl_Position);\r\n", variant)); gsSrc.append("EmitVertex();\r\n"); } void rectsEmulationGS_outputVerticesCode(std::string& gsSrc, LatteDecompilerShader* vertexShader, LatteShaderPSInputTable* psInputTable, sint32 p0, sint32 p1, sint32 p2, sint32 p3, const char* variant, const LatteContextRegister& latteRegister) { sint32 pList[4] = { p0, p1, p2, p3 }; for (sint32 i = 0; i < 4; i++) { if (pList[i] == 3) rectsEmulationGS_outputGeneratedVertex(gsSrc, vertexShader, psInputTable, variant, latteRegister); else rectsEmulationGS_outputSingleVertex(gsSrc, vertexShader, psInputTable, pList[i], latteRegister); } } RendererShaderVk* rectsEmulationGS_generate(LatteDecompilerShader* vertexShader, const LatteContextRegister& latteRegister) { std::string gsSrc; gsSrc.append("#version 450\r\n"); LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); // layout gsSrc.append("layout(triangles) in;\r\n"); gsSrc.append("layout(triangle_strip) out;\r\n"); gsSrc.append("layout(max_vertices = 4) out;\r\n"); // inputs & outputs auto parameterMask = vertexShader->outputParameterMask; for (sint32 f = 0; f < 2; f++) { for (uint32 i = 0; i < 32; i++) { if ((parameterMask & (1 << i)) == 0) continue; sint32 vsSemanticId = psInputTable->getVertexShaderOutParamSemanticId(latteRegister.GetRawView(), i); if (vsSemanticId < 0) continue; auto psImport = psInputTable->getPSImportBySemanticId(vsSemanticId); if (psImport == nullptr) continue; gsSrc.append(fmt::format("layout(location = {}) ", psInputTable->getPSImportLocationBySemanticId(vsSemanticId))); if (psImport->isFlat) gsSrc.append("flat "); if (psImport->isNoPerspective) gsSrc.append("noperspective "); if (f == 0) gsSrc.append("in"); else gsSrc.append("out"); if (f == 0) gsSrc.append(fmt::format(" vec4 passParameterSem{}In[];\r\n", vsSemanticId)); else gsSrc.append(fmt::format(" vec4 passParameterSem{}Out;\r\n", vsSemanticId)); } } // gen function gsSrc.append("vec4 gen4thVertexA(vec4 a, vec4 b, vec4 c)\r\n"); gsSrc.append("{\r\n"); gsSrc.append("return b - (c - a);\r\n"); gsSrc.append("}\r\n"); gsSrc.append("vec4 gen4thVertexB(vec4 a, vec4 b, vec4 c)\r\n"); gsSrc.append("{\r\n"); gsSrc.append("return c - (b - a);\r\n"); gsSrc.append("}\r\n"); gsSrc.append("vec4 gen4thVertexC(vec4 a, vec4 b, vec4 c)\r\n"); gsSrc.append("{\r\n"); gsSrc.append("return c + (b - a);\r\n"); gsSrc.append("}\r\n"); // main gsSrc.append("void main()\r\n"); gsSrc.append("{\r\n"); // there are two possible winding orders that need different triangle generation: // 0 1 // 2 3 // and // 0 1 // 3 2 // all others are just symmetries of these cases // we can determine the case by comparing the distance 0<->1 and 0<->2 gsSrc.append("float dist0_1 = length(gl_in[1].gl_Position.xy - gl_in[0].gl_Position.xy);\r\n"); gsSrc.append("float dist0_2 = length(gl_in[2].gl_Position.xy - gl_in[0].gl_Position.xy);\r\n"); gsSrc.append("float dist1_2 = length(gl_in[2].gl_Position.xy - gl_in[1].gl_Position.xy);\r\n"); // emit vertices gsSrc.append("if(dist0_1 > dist0_2 && dist0_1 > dist1_2)\r\n"); gsSrc.append("{\r\n"); // p0 to p1 is diagonal rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 2, 1, 0, 3, "A", latteRegister); gsSrc.append("} else if ( dist0_2 > dist0_1 && dist0_2 > dist1_2 ) {\r\n"); // p0 to p2 is diagonal rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 1, 2, 0, 3, "B", latteRegister); gsSrc.append("} else {\r\n"); // p1 to p2 is diagonal rectsEmulationGS_outputVerticesCode(gsSrc, vertexShader, psInputTable, 0, 1, 2, 3, "C", latteRegister); gsSrc.append("}\r\n"); gsSrc.append("}\r\n"); auto vkShader = new RendererShaderVk(RendererShader::ShaderType::kGeometry, 0, 0, false, false, gsSrc); vkShader->PreponeCompilation(true); return vkShader; } /* pipeline compiler and cache helper */ extern std::atomic_int g_compiling_pipelines; extern std::atomic_int g_compiling_pipelines_async; extern std::atomic_uint64_t g_compiling_pipelines_syncTimeSum; PipelineCompiler::PipelineCompiler() {}; PipelineCompiler::~PipelineCompiler() { if (m_vkrObjPipeline) m_vkrObjPipeline->decRef(); if (m_renderPassObj) m_renderPassObj->decRef(); }; VkFormat PipelineCompiler::GetVertexFormat(uint8 format) { switch (format) { case FMT_32_32_32_32_FLOAT: return VK_FORMAT_R32G32B32A32_UINT; case FMT_32_32_32_FLOAT: return VK_FORMAT_R32G32B32_UINT; case FMT_32_32_FLOAT: return VK_FORMAT_R32G32_UINT; case FMT_32_FLOAT: return VK_FORMAT_R32_UINT; case FMT_8_8_8_8: return VK_FORMAT_R8G8B8A8_UINT; case FMT_8_8_8: return VK_FORMAT_R8G8B8_UINT; case FMT_8_8: return VK_FORMAT_R8G8_UINT; case FMT_8: return VK_FORMAT_R8_UINT; case FMT_32_32_32_32: return VK_FORMAT_R32G32B32A32_UINT; case FMT_32_32_32: return VK_FORMAT_R32G32B32_UINT; case FMT_32_32: return VK_FORMAT_R32G32_UINT; case FMT_32: return VK_FORMAT_R32_UINT; case FMT_16_16_16_16: return VK_FORMAT_R16G16B16A16_UINT; // verified to match OpenGL case FMT_16_16_16: return VK_FORMAT_R16G16B16_UINT; case FMT_16_16: return VK_FORMAT_R16G16_UINT; case FMT_16: return VK_FORMAT_R16_UINT; case FMT_16_16_16_16_FLOAT: return VK_FORMAT_R16G16B16A16_UINT; // verified to match OpenGL case FMT_16_16_16_FLOAT: return VK_FORMAT_R16G16B16_UINT; case FMT_16_16_FLOAT: return VK_FORMAT_R16G16_UINT; case FMT_16_FLOAT: return VK_FORMAT_R16_UINT; case FMT_2_10_10_10: return VK_FORMAT_R32_UINT; // verified to match OpenGL default: forceLog_printf("Unsupported vertex format: %02x", format); assert_dbg(); return VK_FORMAT_UNDEFINED; } } static VkBlendOp GetVkBlendOp(Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC combineFunc) { switch (combineFunc) { case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_PLUS_SRC: return VK_BLEND_OP_ADD; case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::SRC_MINUS_DST: return VK_BLEND_OP_SUBTRACT; case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MIN_DST_SRC: return VK_BLEND_OP_MIN; case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::MAX_DST_SRC: return VK_BLEND_OP_MAX; case Latte::LATTE_CB_BLENDN_CONTROL::E_COMBINEFUNC::DST_MINUS_SRC: return VK_BLEND_OP_REVERSE_SUBTRACT; default: cemu_assert_suspicious(); return VK_BLEND_OP_ADD; } } static VkBlendFactor GetVkBlendFactor(Latte::LATTE_CB_BLENDN_CONTROL::E_BLENDFACTOR factor) { const VkBlendFactor factors[] = { /* 0x00 */ VK_BLEND_FACTOR_ZERO, /* 0x01 */ VK_BLEND_FACTOR_ONE, /* 0x02 */ VK_BLEND_FACTOR_SRC_COLOR, /* 0x03 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, /* 0x04 */ VK_BLEND_FACTOR_SRC_ALPHA, /* 0x05 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, /* 0x06 */ VK_BLEND_FACTOR_DST_ALPHA, /* 0x07 */ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, /* 0x08 */ VK_BLEND_FACTOR_DST_COLOR, /* 0x09 */ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, /* 0x0A */ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, /* 0x0B */ VK_BLEND_FACTOR_MAX_ENUM, // todo /* 0x0C */ VK_BLEND_FACTOR_MAX_ENUM, // todo /* 0x0D */ VK_BLEND_FACTOR_CONSTANT_COLOR, /* 0x0E */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, /* 0x0F */ VK_BLEND_FACTOR_SRC1_COLOR, /* 0x10 */ VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, /* 0x11 */ VK_BLEND_FACTOR_SRC1_ALPHA, /* 0x12 */ VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, /* 0x13 */ VK_BLEND_FACTOR_CONSTANT_ALPHA, /* 0x14 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA }; cemu_assert_debug((uint32)factor < std::size(factors)); return factors[(uint32)factor]; } bool PipelineCompiler::ConsumesBlendConstants(VkBlendFactor blendFactor) { if (blendFactor == VK_BLEND_FACTOR_CONSTANT_COLOR || blendFactor == VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR || blendFactor == VK_BLEND_FACTOR_CONSTANT_ALPHA || blendFactor == VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) return true; return false; } void PipelineCompiler::CreateDescriptorSetLayout(VulkanRenderer* vkRenderer, LatteDecompilerShader* shader, VkDescriptorSetLayout& layout, PipelineInfo* vkrPipelineInfo) { // create vertex shader descriptor set std::vector descriptorSetLayoutBindings; VkShaderStageFlags stageFlags = 0; uint32 stageIndex = 0; if (shader->shaderType == LatteConst::ShaderType::Vertex) { stageFlags = VK_SHADER_STAGE_VERTEX_BIT; stageIndex = VulkanRendererConst::SHADER_STAGE_INDEX_VERTEX; } else if (shader->shaderType == LatteConst::ShaderType::Pixel) { stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; stageIndex = VulkanRendererConst::SHADER_STAGE_INDEX_FRAGMENT; } else if (shader->shaderType == LatteConst::ShaderType::Geometry) { stageFlags = VK_SHADER_STAGE_GEOMETRY_BIT; stageIndex = VulkanRendererConst::SHADER_STAGE_INDEX_GEOMETRY; } // attributes // -> not part of descriptor // textures sint32 textureBindingBase = shader->resourceMapping.getTextureBaseBindingPoint(); if (textureBindingBase >= 0) { sint32 textureCount = shader->resourceMapping.getTextureCount(); for (sint32 i = 0; i < textureCount; i++) { VkDescriptorSetLayoutBinding entry{}; entry.binding = (uint32)textureBindingBase + i; entry.descriptorCount = 1; entry.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; entry.pImmutableSamplers = nullptr; entry.stageFlags = stageFlags; descriptorSetLayoutBindings.emplace_back(entry); } } // uniform buffers if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) { VkDescriptorSetLayoutBinding entry{}; entry.binding = shader->resourceMapping.uniformVarsBufferBindingPoint; entry.descriptorCount = 1; entry.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; entry.pImmutableSamplers = nullptr; entry.stageFlags = stageFlags; descriptorSetLayoutBindings.emplace_back(entry); } for (sint32 i = 0; i < LATTE_NUM_MAX_UNIFORM_BUFFERS; i++) { if (shader->resourceMapping.uniformBuffersBindingPoint[i] >= 0) { VkDescriptorSetLayoutBinding entry{}; entry.binding = shader->resourceMapping.uniformBuffersBindingPoint[i]; entry.descriptorCount = 1; entry.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; entry.pImmutableSamplers = nullptr; entry.stageFlags = stageFlags; descriptorSetLayoutBindings.emplace_back(entry); vkrPipelineInfo->dynamicOffsetInfo.list_uniformBuffers[stageIndex].emplace_back((uint8)i); } } // storage buffer for TF if (shader->resourceMapping.tfStorageBindingPoint >= 0) { VkDescriptorSetLayoutBinding entry{}; entry.binding = shader->resourceMapping.tfStorageBindingPoint; entry.descriptorCount = 1; entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; entry.pImmutableSamplers = nullptr; entry.stageFlags = stageFlags; descriptorSetLayoutBindings.emplace_back(entry); } if (shader->resourceMapping.uniformVarsBufferBindingPoint >= 0) vkrPipelineInfo->dynamicOffsetInfo.hasUniformVar[stageIndex] = true; if (shader->resourceMapping.hasUniformBuffers()) vkrPipelineInfo->dynamicOffsetInfo.hasUniformBuffers[stageIndex] = true; VkDescriptorSetLayoutCreateInfo layoutInfo = {}; layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; layoutInfo.bindingCount = descriptorSetLayoutBindings.size(); layoutInfo.pBindings = descriptorSetLayoutBindings.data(); if (vkCreateDescriptorSetLayout(vkRenderer->m_logicalDevice, &layoutInfo, nullptr, &layout) != VK_SUCCESS) vkRenderer->UnrecoverableError(fmt::format("Failed to create descriptor set layout for shader {0:#x}", shader->baseHash).c_str()); } bool PipelineCompiler::InitShaderStages(VulkanRenderer* vkRenderer, RendererShaderVk* vkVertexShader, RendererShaderVk* vkPixelShader, RendererShaderVk* vkGeometryShader) { // prepare shader stages cemu_assert_debug(vkVertexShader == nullptr || vkVertexShader->IsCompiled()); cemu_assert_debug(vkPixelShader == nullptr || vkPixelShader->IsCompiled()); cemu_assert_debug(vkGeometryShader == nullptr || vkGeometryShader->IsCompiled()); if ((vkVertexShader && vkVertexShader->GetShaderModule() == VK_NULL_HANDLE) || (vkGeometryShader && vkGeometryShader->GetShaderModule() == VK_NULL_HANDLE) || (vkPixelShader && vkPixelShader->GetShaderModule() == VK_NULL_HANDLE)) { forceLog_printf("Vulkan-Info: Pipeline creation failed due to invalid shader(s)"); return false; } if (vkVertexShader) shaderStages.emplace_back(vkRenderer->CreatePipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vkVertexShader->GetShaderModule(), "main")); if (vkGeometryShader) shaderStages.emplace_back(vkRenderer->CreatePipelineShaderStageCreateInfo(VK_SHADER_STAGE_GEOMETRY_BIT, vkGeometryShader->GetShaderModule(), "main")); else if (m_rectEmulationGS) shaderStages.emplace_back(vkRenderer->CreatePipelineShaderStageCreateInfo(VK_SHADER_STAGE_GEOMETRY_BIT, m_rectEmulationGS->GetShaderModule(), "main")); if (vkPixelShader) shaderStages.emplace_back(vkRenderer->CreatePipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, vkPixelShader->GetShaderModule(), "main")); return true; } void PipelineCompiler::InitVertexInputState(const LatteContextRegister& latteRegister, LatteDecompilerShader* vertexShader, LatteFetchShader* fetchShader) { vertexInputAttributeDescription.reserve(16); vertexInputBindingDescription.reserve(fetchShader->bufferGroups.size()); for (auto& bufferGroup : fetchShader->bufferGroups) { std::optional fetchType; for (sint32 j = 0; j < bufferGroup.attribCount; ++j) { auto& attr = bufferGroup.attrib[j]; uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; if (semanticId == (uint32)-1) continue; // attribute not used? VkVertexInputAttributeDescription entry{}; entry.location = semanticId; entry.offset = attr.offset; entry.binding = attr.attributeBufferIndex; entry.format = GetVertexFormat(attr.format); vertexInputAttributeDescription.emplace_back(entry); if (fetchType.has_value()) cemu_assert_debug(fetchType == attr.fetchType); else fetchType = attr.fetchType; if (attr.fetchType == LatteConst::INSTANCE_DATA) { cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported // use VK_EXT_vertex_attribute_divisor } } uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (latteRegister.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; VkVertexInputBindingDescription entry{}; entry.stride = bufferStride; if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) entry.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) entry.inputRate = VK_VERTEX_INPUT_RATE_INSTANCE; else { cemu_assert(false); } entry.binding = bufferIndex; vertexInputBindingDescription.emplace_back(entry); } vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; vertexInputInfo.vertexBindingDescriptionCount = vertexInputBindingDescription.size(); vertexInputInfo.pVertexBindingDescriptions = vertexInputBindingDescription.data(); vertexInputInfo.vertexAttributeDescriptionCount = vertexInputAttributeDescription.size(); vertexInputInfo.pVertexAttributeDescriptions = vertexInputAttributeDescription.data(); } void PipelineCompiler::InitInputAssemblyState(const Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE primitiveMode) { inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; inputAssembly.primitiveRestartEnable = VK_TRUE; switch (primitiveMode) { case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; inputAssembly.primitiveRestartEnable = false; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINES: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; inputAssembly.primitiveRestartEnable = false; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_LOOP: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // line loops are emulated as line strips with an extra connecting strip at the end break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::LINE_STRIP_ADJACENT: // Tropical Freeze level 3-6 inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLES: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; inputAssembly.primitiveRestartEnable = false; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_FAN: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::TRIANGLE_STRIP: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUADS: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; // quads are emulated as 2 triangles inputAssembly.primitiveRestartEnable = false; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::QUAD_STRIP: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; // quad strips are emulated as (count-2)/2 triangles inputAssembly.primitiveRestartEnable = false; break; case Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS: inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; // rects are emulated as 2 triangles inputAssembly.primitiveRestartEnable = false; break; default: forceLogDebug_printf("Vulkan-Unsupported: Graphics pipeline with primitive mode %d created", primitiveMode); cemu_assert_debug(false); } } void PipelineCompiler::InitViewportState() { viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; viewportState.viewportCount = 1; viewportState.scissorCount = 1; } void PipelineCompiler::InitRasterizerState(const LatteContextRegister& latteRegister, VulkanRenderer* vkRenderer, bool isPrimitiveRect, bool& usesDepthBias) { // polygon control const auto& polygonControlReg = latteRegister.PA_SU_SC_MODE_CNTL; const auto frontFace = polygonControlReg.get_FRONT_FACE(); uint32 cullFront = polygonControlReg.get_CULL_FRONT(); uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 polyOffsetFrontEnable = polygonControlReg.get_OFFSET_FRONT_ENABLED(); cemu_assert_debug(LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_NEAR_DISABLE() == LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE()); // near or far clipping can be disabled individually bool zClipEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_ZCLIP_FAR_DISABLE() == false; // z-clipping rasterizerExt.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT; rasterizerExt.depthClipEnable = zClipEnable; rasterizerExt.flags = 0; rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; rasterizer.pNext = &rasterizerExt; rasterizer.rasterizerDiscardEnable = LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); // GX2SetSpecialState(0, true) workaround if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) rasterizer.rasterizerDiscardEnable = false; rasterizer.polygonMode = VK_POLYGON_MODE_FILL; if (vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect) rasterizer.polygonMode = VK_POLYGON_MODE_FILL_RECTANGLE_NV; rasterizer.depthClampEnable = VK_TRUE; // depth clamping is always enabled rasterizer.lineWidth = 1.0f; // TODO -> mmPA_SU_LINE_CNTL usesDepthBias = polyOffsetFrontEnable; if (polyOffsetFrontEnable) { rasterizer.depthBiasEnable = VK_TRUE; // initialize to zero, set dynamically via vkCmdSetDepthBias rasterizer.depthBiasConstantFactor = 0.0f; rasterizer.depthBiasSlopeFactor = 0.0f; rasterizer.depthBiasClamp = 0.0f; } else rasterizer.depthBiasEnable = VK_FALSE; // todo - how does culling behave with rects? // right now we just assume that their winding is always CW if (isPrimitiveRect) { if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CW) cullFront = cullBack; else cullBack = cullFront; } if (cullFront && cullBack) rasterizer.cullMode = VK_CULL_MODE_FRONT_AND_BACK; else if (cullFront) rasterizer.cullMode = VK_CULL_MODE_FRONT_BIT; else if (cullBack) rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; else rasterizer.cullMode = VK_CULL_MODE_NONE; if (frontFace == Latte::LATTE_PA_SU_SC_MODE_CNTL::E_FRONTFACE::CCW) rasterizer.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; else rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE; // multisampling multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; multisampling.sampleShadingEnable = VK_FALSE; multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; } void PipelineCompiler::InitBlendState(const LatteContextRegister& latteRegister, PipelineInfo* pipelineInfo, bool& usesBlendConstants) { const Latte::LATTE_CB_COLOR_CONTROL& colorControlReg = latteRegister.CB_COLOR_CONTROL; uint32 blendEnableMask = colorControlReg.get_BLEND_MASK(); uint32 renderTargetMask = latteRegister.CB_TARGET_MASK.get_MASK(); usesBlendConstants = false; for (size_t i = 0; i < colorBlendAttachments.size(); i++) { auto& entry = colorBlendAttachments[i]; if (((blendEnableMask & (1 << i))) != 0) entry.blendEnable = VK_TRUE; else entry.blendEnable = VK_FALSE; const auto& blendControlReg = latteRegister.CB_BLENDN_CONTROL[i]; entry.colorWriteMask = (renderTargetMask >> (i * 4)) & 0xF; entry.colorBlendOp = GetVkBlendOp(blendControlReg.get_COLOR_COMB_FCN()); entry.srcColorBlendFactor = GetVkBlendFactor(blendControlReg.get_COLOR_SRCBLEND()); entry.dstColorBlendFactor = GetVkBlendFactor(blendControlReg.get_COLOR_DSTBLEND()); if (blendControlReg.get_SEPARATE_ALPHA_BLEND()) { entry.alphaBlendOp = GetVkBlendOp(blendControlReg.get_ALPHA_COMB_FCN()); entry.srcAlphaBlendFactor = GetVkBlendFactor(blendControlReg.get_ALPHA_SRCBLEND()); entry.dstAlphaBlendFactor = GetVkBlendFactor(blendControlReg.get_ALPHA_DSTBLEND()); } else { entry.alphaBlendOp = entry.colorBlendOp; entry.srcAlphaBlendFactor = entry.srcColorBlendFactor; entry.dstAlphaBlendFactor = entry.dstColorBlendFactor; } usesBlendConstants |= ConsumesBlendConstants(entry.srcColorBlendFactor); usesBlendConstants |= ConsumesBlendConstants(entry.dstColorBlendFactor); usesBlendConstants |= ConsumesBlendConstants(entry.srcAlphaBlendFactor); usesBlendConstants |= ConsumesBlendConstants(entry.dstAlphaBlendFactor); } // setup VkPipelineColorBlendStateCreateInfo colorBlending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; const auto logicOp = colorControlReg.get_ROP(); if (logicOp == Latte::LATTE_CB_COLOR_CONTROL::E_LOGICOP::COPY) { colorBlending.logicOpEnable = VK_FALSE; colorBlending.logicOp = VK_LOGIC_OP_COPY; } else { colorBlending.logicOpEnable = VK_TRUE; switch (logicOp) { case Latte::LATTE_CB_COLOR_CONTROL::E_LOGICOP::SET: colorBlending.logicOp = VK_LOGIC_OP_SET; break; case Latte::LATTE_CB_COLOR_CONTROL::E_LOGICOP::CLEAR: colorBlending.logicOp = VK_LOGIC_OP_CLEAR; break; case Latte::LATTE_CB_COLOR_CONTROL::E_LOGICOP::OR: colorBlending.logicOp = VK_LOGIC_OP_OR; break; default: colorBlending.logicOp = VK_LOGIC_OP_COPY; cemu_assert_unimplemented(); } } colorBlending.attachmentCount = colorBlendAttachments.size(); colorBlending.pAttachments = colorBlendAttachments.data(); // we use VK_DYNAMIC_STATE_BLEND_CONSTANTS, the blend constants here don't matter colorBlending.blendConstants[0] = 0; colorBlending.blendConstants[1] = 0; colorBlending.blendConstants[2] = 0; colorBlending.blendConstants[3] = 0; } void PipelineCompiler::InitDescriptorSetLayouts(VulkanRenderer* vkRenderer, PipelineInfo* vkrPipelineInfo, LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader) { auto vkObjPipeline = vkrPipelineInfo->m_vkrObjPipeline; if (vertexShader) { cemu_assert_debug(descriptorSetLayoutCount == 0); CreateDescriptorSetLayout(vkRenderer, vertexShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo); vkObjPipeline->vertexDSL = descriptorSetLayout[descriptorSetLayoutCount]; descriptorSetLayoutCount++; } if (pixelShader) { cemu_assert_debug(descriptorSetLayoutCount == 1); CreateDescriptorSetLayout(vkRenderer, pixelShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo); vkObjPipeline->pixelDSL = descriptorSetLayout[descriptorSetLayoutCount]; descriptorSetLayoutCount++; } else if (geometryShader) { // if no pixel shader is present, create empty placeholder descriptor set layout (geometry shader set must be at index 2) VkDescriptorSetLayoutCreateInfo layoutInfo = {}; layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; layoutInfo.bindingCount = 0; layoutInfo.pBindings = nullptr; if (vkCreateDescriptorSetLayout(vkRenderer->m_logicalDevice, &layoutInfo, nullptr, &descriptorSetLayout[descriptorSetLayoutCount]) != VK_SUCCESS) vkRenderer->UnrecoverableError(fmt::format("Failed to create placeholder descriptor set layout for shader {0:#x}", geometryShader->baseHash).c_str()); descriptorSetLayoutCount++; } if (geometryShader) { cemu_assert_debug(descriptorSetLayoutCount == 2); CreateDescriptorSetLayout(vkRenderer, geometryShader, descriptorSetLayout[descriptorSetLayoutCount], vkrPipelineInfo); vkObjPipeline->geometryDSL = descriptorSetLayout[descriptorSetLayoutCount]; descriptorSetLayoutCount++; } } void PipelineCompiler::InitDepthStencilState() { // get depth control parameters bool depthEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_ENABLE(); auto depthFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_FUNC(); bool depthWriteEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_Z_WRITE_ENABLE(); // setup VkPipelineDepthStencilStateCreateInfo depthStencilState.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; depthStencilState.depthTestEnable = depthEnable ? VK_TRUE : VK_FALSE; depthStencilState.depthWriteEnable = depthWriteEnable ? VK_TRUE : VK_FALSE; static const VkCompareOp vkDepthCompareTable[8] = { VK_COMPARE_OP_NEVER, VK_COMPARE_OP_LESS, VK_COMPARE_OP_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL, VK_COMPARE_OP_GREATER, VK_COMPARE_OP_NOT_EQUAL, VK_COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_ALWAYS }; depthStencilState.depthCompareOp = vkDepthCompareTable[(size_t)depthFunc]; depthStencilState.depthBoundsTestEnable = false; // todo depthStencilState.minDepthBounds = 0.0f; depthStencilState.maxDepthBounds = 1.0f; // get stencil control parameters bool stencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ENABLE(); bool backStencilEnable = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_BACK_STENCIL_ENABLE(); auto frontStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_F(); auto frontStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_F(); auto frontStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_F(); auto frontStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_F(); auto backStencilFunc = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FUNC_B(); auto backStencilZPass = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZPASS_B(); auto backStencilZFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_ZFAIL_B(); auto backStencilFail = LatteGPUState.contextNew.DB_DEPTH_CONTROL.get_STENCIL_FAIL_B(); // get stencil control parameters uint32 stencilCompareMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILMASK_F(); uint32 stencilWriteMaskFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILWRITEMASK_F(); uint32 stencilRefFront = LatteGPUState.contextNew.DB_STENCILREFMASK.get_STENCILREF_F(); uint32 stencilCompareMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILMASK_B(); uint32 stencilWriteMaskBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILWRITEMASK_B(); uint32 stencilRefBack = LatteGPUState.contextNew.DB_STENCILREFMASK_BF.get_STENCILREF_B(); static const VkStencilOp stencilOpTable[8] = { VK_STENCIL_OP_KEEP, VK_STENCIL_OP_ZERO, VK_STENCIL_OP_REPLACE, VK_STENCIL_OP_INCREMENT_AND_CLAMP, VK_STENCIL_OP_DECREMENT_AND_CLAMP, VK_STENCIL_OP_INVERT, VK_STENCIL_OP_INCREMENT_AND_WRAP, VK_STENCIL_OP_DECREMENT_AND_WRAP }; depthStencilState.stencilTestEnable = stencilEnable ? VK_TRUE : VK_FALSE; depthStencilState.front.reference = stencilRefFront; depthStencilState.front.compareMask = stencilCompareMaskFront; depthStencilState.front.writeMask = stencilWriteMaskBack; depthStencilState.front.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; depthStencilState.front.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; depthStencilState.front.failOp = stencilOpTable[(size_t)frontStencilFail]; depthStencilState.front.passOp = stencilOpTable[(size_t)frontStencilZPass]; if (backStencilEnable) { depthStencilState.back.reference = stencilRefBack; depthStencilState.back.compareMask = stencilCompareMaskBack; depthStencilState.back.writeMask = stencilWriteMaskBack; depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)backStencilFunc]; depthStencilState.back.depthFailOp = stencilOpTable[(size_t)backStencilZFail]; depthStencilState.back.failOp = stencilOpTable[(size_t)backStencilFail]; depthStencilState.back.passOp = stencilOpTable[(size_t)backStencilZPass]; } else { depthStencilState.back.reference = stencilRefFront; depthStencilState.back.compareMask = stencilCompareMaskFront; depthStencilState.back.writeMask = stencilWriteMaskFront; depthStencilState.back.compareOp = vkDepthCompareTable[(size_t)frontStencilFunc]; depthStencilState.back.depthFailOp = stencilOpTable[(size_t)frontStencilZFail]; depthStencilState.back.failOp = stencilOpTable[(size_t)frontStencilFail]; depthStencilState.back.passOp = stencilOpTable[(size_t)frontStencilZPass]; } } void PipelineCompiler::InitDynamicState(PipelineInfo* pipelineInfo, bool usesBlendConstants, bool usesDepthBias) { if (usesBlendConstants) { dynamicStates.emplace_back(VK_DYNAMIC_STATE_BLEND_CONSTANTS); pipelineInfo->usesBlendConstants = true; } if (usesDepthBias) { dynamicStates.emplace_back(VK_DYNAMIC_STATE_DEPTH_BIAS); pipelineInfo->usesDepthBias = true; } dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; dynamicState.dynamicStateCount = dynamicStates.size(); dynamicState.pDynamicStates = dynamicStates.data(); } bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj) { VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance(); // ########################################################################################################################################## bool isPrimitiveRect = false; const auto primitiveMode = latteRegister.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); m_fetchShader = pipelineInfo->fetchShader; m_vkVertexShader = pipelineInfo->vertexShaderVk; m_vkPixelShader = pipelineInfo->pixelShaderVk; m_vkGeometryShader = pipelineInfo->geometryShaderVk; m_vkrObjPipeline = pipelineInfo->m_vkrObjPipeline; m_renderPassObj = renderPassObj; // if required generate RECT emulation geometry shader if (!vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect) { cemu_assert(m_vkGeometryShader == nullptr); // todo - handle cases where the game already provides a GS m_rectEmulationGS = rectsEmulationGS_generate(pipelineInfo->vertexShader, latteRegister); pipelineInfo->rectEmulationGS = m_rectEmulationGS; } // ########################################################################################################################################## pipelineInfo->primitiveMode = primitiveMode; InitVertexInputState(latteRegister, pipelineInfo->vertexShader, pipelineInfo->fetchShader); InitInputAssemblyState(primitiveMode); InitViewportState(); bool usesDepthBias = false; InitRasterizerState(latteRegister, vkRenderer, isPrimitiveRect, usesDepthBias); bool usesBlendConstants = false; InitBlendState(latteRegister, pipelineInfo, usesBlendConstants); InitDescriptorSetLayouts(vkRenderer, pipelineInfo, pipelineInfo->vertexShader, pipelineInfo->pixelShader, pipelineInfo->geometryShader); // ########################################################################################################################################## VkPipelineLayoutCreateInfo pipelineLayoutInfo{}; pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipelineLayoutInfo.setLayoutCount = descriptorSetLayoutCount; pipelineLayoutInfo.pSetLayouts = descriptorSetLayout; pipelineLayoutInfo.pPushConstantRanges = nullptr; pipelineLayoutInfo.pushConstantRangeCount = 0; VkResult result = vkCreatePipelineLayout(vkRenderer->m_logicalDevice, &pipelineLayoutInfo, nullptr, &m_pipeline_layout); if (result != VK_SUCCESS) { forceLog_printf("%s", fmt::format("Failed to create pipeline layout: {}", result).c_str()); s_nvidiaWorkaround.unlock(); return false; } // ################################################### InitDepthStencilState(); // ########################################################################################################################################## InitDynamicState(pipelineInfo, usesBlendConstants, usesDepthBias); // ########################################################################################################################################## pipelineInfo->m_vkrObjPipeline->pipeline_layout = m_pipeline_layout; // increment ref counter for vkrObjPipeline and renderpass object to make sure they dont get released while we are using them m_vkrObjPipeline->incRef(); renderPassObj->incRef(); return true; } bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool showInOverlay) { VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance(); if (!vkRenderer->m_featureControl.deviceExtensions.pipeline_creation_cache_control) forceCompile = true; // if VK_EXT_pipeline_creation_cache_control is not supported we always force synchronous compilation if (!forceCompile) { // fail early if some shader stages are not compiled if (m_vkVertexShader && m_vkVertexShader->IsCompiled() == false) return false; if (m_vkPixelShader && m_vkPixelShader->IsCompiled() == false) return false; if (m_vkGeometryShader && m_vkGeometryShader->IsCompiled() == false) return false; } else { // if some shader stages are not compiled yet, compile them now if (m_vkVertexShader && m_vkVertexShader->IsCompiled() == false) m_vkVertexShader->PreponeCompilation(isRenderThread); if (m_vkPixelShader && m_vkPixelShader->IsCompiled() == false) m_vkPixelShader->PreponeCompilation(isRenderThread); if (m_vkGeometryShader && m_vkGeometryShader->IsCompiled() == false) m_vkGeometryShader->PreponeCompilation(isRenderThread); } if (shaderStages.empty()) { if (!InitShaderStages(vkRenderer, m_vkVertexShader, m_vkPixelShader, m_vkGeometryShader)) return true; // invalid shaders, cannot compile } VkGraphicsPipelineCreateInfo pipelineInfo{}; pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; pipelineInfo.stageCount = shaderStages.size(); pipelineInfo.pStages = shaderStages.data(); pipelineInfo.pVertexInputState = &vertexInputInfo; pipelineInfo.pInputAssemblyState = &inputAssembly; pipelineInfo.pViewportState = &viewportState; pipelineInfo.pDynamicState = &dynamicState; pipelineInfo.pRasterizationState = &rasterizer; pipelineInfo.pMultisampleState = &multisampling; pipelineInfo.pColorBlendState = &colorBlending; pipelineInfo.layout = m_pipeline_layout; pipelineInfo.renderPass = m_renderPassObj->m_renderPass; pipelineInfo.pDepthStencilState = &depthStencilState; pipelineInfo.subpass = 0; pipelineInfo.basePipelineHandle = nullptr; pipelineInfo.flags = 0; if (!forceCompile) pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo; VkPipelineCreationFeedbackEXT creationFeedback; std::vector creationStageFeedback(0); if (vkRenderer->m_featureControl.deviceExtensions.pipeline_feedback) { creationFeedback = {}; creationFeedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT; creationStageFeedback.reserve(pipelineInfo.stageCount); for (uint32_t i = 0; i < pipelineInfo.stageCount; ++i) creationStageFeedback.data()[i] = { VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT, 0 }; creationFeedbackInfo = {}; creationFeedbackInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT; creationFeedbackInfo.pPipelineCreationFeedback = &creationFeedback; creationFeedbackInfo.pPipelineStageCreationFeedbacks = creationStageFeedback.data(); creationFeedbackInfo.pipelineStageCreationFeedbackCount = pipelineInfo.stageCount; pipelineInfo.pNext = &creationFeedbackInfo; } VkPipeline pipeline = VK_NULL_HANDLE; VkResult result; uint8 retryCount = 0; while (retryCount < 3) { std::shared_lock lock(vkRenderer->m_pipeline_cache_save_mutex); result = vkCreateGraphicsPipelines(vkRenderer->m_logicalDevice, vkRenderer->m_pipeline_cache, 1, &pipelineInfo, nullptr, &pipeline); lock.unlock(); if (result != VK_ERROR_OUT_OF_DEVICE_MEMORY) break; retryCount++; } if (result == VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT) { return false; } else if (result == VK_SUCCESS) { m_vkrObjPipeline->setPipeline(pipeline); } else { forceLog_printf("Failed to create graphics pipeline. Error %d", (sint32)result); cemu_assert_debug(false); return true; // true indicates that caller should no longer attempt to compile this pipeline again } vkRenderer->m_pipeline_cache_semaphore.notify(); if (vkRenderer->m_featureControl.deviceExtensions.pipeline_feedback) { if (HAS_FLAG(creationFeedback.flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT)) { bool hasCacheHit = HAS_FLAG(creationFeedback.flags, VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT); if (!hasCacheHit) { if (showInOverlay) { if (isRenderThread) g_compiling_pipelines_syncTimeSum += creationFeedback.duration; else g_compiling_pipelines_async++; g_compiling_pipelines++; } } } } return true; } void PipelineCompiler::TrackAsCached(uint64 baseHash, uint64 pipelineStateHash) { auto& pipelineCache = VulkanPipelineStableCache::GetInstance(); if (pipelineCache.HasPipelineCached(baseHash, pipelineStateHash)) return; pipelineCache.AddCurrentStateToCache(baseHash, pipelineStateHash); }