From 61484598fc21fb127d8499691b72309942ee5e97 Mon Sep 17 00:00:00 2001 From: Exzap <13877693+Exzap@users.noreply.github.com> Date: Sat, 10 May 2025 09:48:35 +0200 Subject: [PATCH] Vulkan: Use per-pipeline buffer robustness And if the extension is not supported then fallback to enabling robust buffer access for all shaders. --- .../Vulkan/VulkanPipelineCompiler.cpp | 51 ++++++++++++++++++- .../Renderer/Vulkan/VulkanPipelineCompiler.h | 5 +- .../Vulkan/VulkanPipelineStableCache.cpp | 8 +-- .../Latte/Renderer/Vulkan/VulkanRenderer.cpp | 40 +++++++++++++-- .../HW/Latte/Renderer/Vulkan/VulkanRenderer.h | 1 + .../Renderer/Vulkan/VulkanRendererCore.cpp | 3 +- 6 files changed, 96 insertions(+), 12 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp index 1ea522dc..7555c03a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp @@ -873,7 +873,7 @@ void PipelineCompiler::InitDynamicState(PipelineInfo* pipelineInfo, bool usesBle dynamicState.pDynamicStates = dynamicStates.data(); } -bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj) +bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess) { VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance(); @@ -888,6 +888,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const m_vkGeometryShader = pipelineInfo->geometryShaderVk; m_vkrObjPipeline = pipelineInfo->m_vkrObjPipeline; m_renderPassObj = renderPassObj; + m_requestRobustBufferAccess = requireRobustBufferAccess; // if required generate RECT emulation geometry shader if (!vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect) @@ -998,6 +999,8 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show if (!forceCompile) pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT; + void* prevStruct = nullptr; + VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo; VkPipelineCreationFeedbackEXT creationFeedback; std::vector creationStageFeedback(0); @@ -1015,9 +1018,25 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show creationFeedbackInfo.pPipelineCreationFeedback = &creationFeedback; creationFeedbackInfo.pPipelineStageCreationFeedbacks = creationStageFeedback.data(); creationFeedbackInfo.pipelineStageCreationFeedbackCount = pipelineInfo.stageCount; - pipelineInfo.pNext = &creationFeedbackInfo; + creationFeedbackInfo.pNext = prevStruct; + prevStruct = &creationFeedbackInfo; } + VkPipelineRobustnessCreateInfoEXT pipelineRobustnessCreateInfo{}; + if (vkRenderer->m_featureControl.deviceExtensions.pipeline_robustness && m_requestRobustBufferAccess) + { + // per-pipeline handling of robust buffer access, if the extension is not available then we fall back to device feature robustBufferAccess + pipelineRobustnessCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_ROBUSTNESS_CREATE_INFO_EXT; + pipelineRobustnessCreateInfo.pNext = prevStruct; + prevStruct = &pipelineRobustnessCreateInfo; + pipelineRobustnessCreateInfo.storageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; + pipelineRobustnessCreateInfo.uniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; + pipelineRobustnessCreateInfo.vertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT; + pipelineRobustnessCreateInfo.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT; + } + + pipelineInfo.pNext = prevStruct; + VkPipeline pipeline = VK_NULL_HANDLE; VkResult result; uint8 retryCount = 0; @@ -1075,3 +1094,31 @@ void PipelineCompiler::TrackAsCached(uint64 baseHash, uint64 pipelineStateHash) return; pipelineCache.AddCurrentStateToCache(baseHash, pipelineStateHash); } + +// calculate whether the pipeline requires robust buffer access +// if there is a potential risk for a shader to do out-of-bounds reads or writes we need to enable robust buffer access +// this can happen when: +// - Streamout is used with too small of a buffer (probably? Could also be some issue with how the streamout array index is calculated -> We can maybe fix this in the future) +// - The shader uses dynamic indices for uniform access. This will trigger the uniform mode to be FULL_CBANK +bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader) +{ + bool requiresRobustBufferAcces = false; + if (vertexShader) + { + cemu_assert_debug(vertexShader->shaderType == LatteConst::ShaderType::Vertex); + requiresRobustBufferAcces |= vertexShader->hasStreamoutBufferWrite; + requiresRobustBufferAcces |= vertexShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK; + } + if (geometryShader) + { + cemu_assert_debug(geometryShader->shaderType == LatteConst::ShaderType::Geometry); + requiresRobustBufferAcces |= geometryShader->hasStreamoutBufferWrite; + requiresRobustBufferAcces |= geometryShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK; + } + if (pixelShader) + { + cemu_assert_debug(pixelShader->shaderType == LatteConst::ShaderType::Pixel); + requiresRobustBufferAcces |= pixelShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK; + } + return requiresRobustBufferAcces; +} diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h index 7879b932..7297049e 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h @@ -38,11 +38,14 @@ public: RendererShaderVk* m_vkPixelShader{}; RendererShaderVk* m_vkGeometryShader{}; - bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj); + bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess); void TrackAsCached(uint64 baseHash, uint64 pipelineStateHash); // stores pipeline to permanent cache if not yet cached. Must be called synchronously from render thread due to dependency on GPU state + static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader); + VkPipelineLayout m_pipelineLayout; VKRObjectRenderPass* m_renderPassObj{}; + bool m_requestRobustBufferAccess{false}; /* shader stages */ std::vector shaderStages; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.cpp index 123120d3..9f8f4491 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.cpp @@ -277,8 +277,9 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span fileData) m_pipelineIsCachedLock.unlock(); // compile { - PipelineCompiler pp; - if (!pp.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass)) + PipelineCompiler pipelineCompiler; + bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader); + if (!pipelineCompiler.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass, requiresRobustBufferAccess)) { s_spinlockSharedInternal.lock(); delete lcr; @@ -286,8 +287,7 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span fileData) s_spinlockSharedInternal.unlock(); return; } - pp.Compile(true, true, false); - // destroy pp early + pipelineCompiler.Compile(true, true, false); } // on success, calculate pipeline hash and flag as present in cache uint64 pipelineBaseHash = vertexShader->baseHash; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index a88c3818..aed0db25 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -50,7 +50,8 @@ const std::vector kOptionalDeviceExtensions = VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, VK_KHR_PRESENT_WAIT_EXTENSION_NAME, VK_KHR_PRESENT_ID_EXTENSION_NAME, - VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME + VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME, + VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME }; const std::vector kRequiredDeviceExtensions = @@ -263,6 +264,14 @@ void VulkanRenderer::GetDeviceFeatures() pwf.pNext = prevStruct; prevStruct = &pwf; + VkPhysicalDevicePipelineRobustnessFeaturesEXT pprf{}; + if (m_featureControl.deviceExtensions.pipeline_robustness) + { + pprf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT; + pprf.pNext = prevStruct; + prevStruct = &pprf; + } + VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{}; physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; physicalDeviceFeatures2.pNext = prevStruct; @@ -317,6 +326,11 @@ void VulkanRenderer::GetDeviceFeatures() { cemuLog_log(LogType::Force, "VK_EXT_depth_clip_enable not supported"); } + if (m_featureControl.deviceExtensions.pipeline_robustness) + { + if ( pprf.pipelineRobustness != VK_TRUE ) + m_featureControl.deviceExtensions.pipeline_robustness = false; + } // get limits m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4); m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4); @@ -475,11 +489,17 @@ VulkanRenderer::VulkanRenderer() deviceFeatures.occlusionQueryPrecise = VK_TRUE; deviceFeatures.depthClamp = VK_TRUE; deviceFeatures.depthBiasClamp = VK_TRUE; - if (m_vendor == GfxVendor::AMD) + + if (m_featureControl.deviceExtensions.pipeline_robustness) { - deviceFeatures.robustBufferAccess = VK_TRUE; - cemuLog_log(LogType::Force, "Enable robust buffer access"); + deviceFeatures.robustBufferAccess = VK_FALSE; } + else + { + cemuLog_log(LogType::Force, "VK_EXT_pipeline_robustness not supported. Falling back to robustBufferAccess"); + deviceFeatures.robustBufferAccess = VK_TRUE; + } + if (m_featureControl.mode.useTFEmulationViaSSBO) { deviceFeatures.vertexPipelineStoresAndAtomics = true; @@ -524,6 +544,15 @@ VulkanRenderer::VulkanRenderer() deviceExtensionFeatures = &presentWaitFeature; presentWaitFeature.presentWait = VK_TRUE; } + // enable VK_EXT_pipeline_robustness + VkPhysicalDevicePipelineRobustnessFeaturesEXT pipelineRobustnessFeature{}; + if (m_featureControl.deviceExtensions.pipeline_robustness) + { + pipelineRobustnessFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT; + pipelineRobustnessFeature.pNext = deviceExtensionFeatures; + deviceExtensionFeatures = &pipelineRobustnessFeature; + pipelineRobustnessFeature.pipelineRobustness = VK_TRUE; + } std::vector used_extensions; VkDeviceCreateInfo createInfo = CreateDeviceCreateInfo(queueCreateInfos, deviceFeatures, deviceExtensionFeatures, used_extensions); @@ -1127,6 +1156,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vectorInitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj()); + bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader); + pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj(), requiresRobustBufferAccess); pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash); // use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable)