Vulkan: Use per-pipeline buffer robustness
Some checks failed
Generate translation template / generate-pot (push) Failing after 2s
Build check / build (push) Has been cancelled

And if the extension is not supported then fallback to enabling robust buffer access for all shaders.
This commit is contained in:
Exzap 2025-05-10 09:48:35 +02:00
parent 081ebead5f
commit 61484598fc
6 changed files with 96 additions and 12 deletions

View file

@ -873,7 +873,7 @@ void PipelineCompiler::InitDynamicState(PipelineInfo* pipelineInfo, bool usesBle
dynamicState.pDynamicStates = dynamicStates.data();
}
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj)
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess)
{
VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance();
@ -888,6 +888,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
m_vkGeometryShader = pipelineInfo->geometryShaderVk;
m_vkrObjPipeline = pipelineInfo->m_vkrObjPipeline;
m_renderPassObj = renderPassObj;
m_requestRobustBufferAccess = requireRobustBufferAccess;
// if required generate RECT emulation geometry shader
if (!vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect)
@ -998,6 +999,8 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
if (!forceCompile)
pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT;
void* prevStruct = nullptr;
VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo;
VkPipelineCreationFeedbackEXT creationFeedback;
std::vector<VkPipelineCreationFeedbackEXT> creationStageFeedback(0);
@ -1015,9 +1018,25 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
creationFeedbackInfo.pPipelineCreationFeedback = &creationFeedback;
creationFeedbackInfo.pPipelineStageCreationFeedbacks = creationStageFeedback.data();
creationFeedbackInfo.pipelineStageCreationFeedbackCount = pipelineInfo.stageCount;
pipelineInfo.pNext = &creationFeedbackInfo;
creationFeedbackInfo.pNext = prevStruct;
prevStruct = &creationFeedbackInfo;
}
VkPipelineRobustnessCreateInfoEXT pipelineRobustnessCreateInfo{};
if (vkRenderer->m_featureControl.deviceExtensions.pipeline_robustness && m_requestRobustBufferAccess)
{
// per-pipeline handling of robust buffer access, if the extension is not available then we fall back to device feature robustBufferAccess
pipelineRobustnessCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_ROBUSTNESS_CREATE_INFO_EXT;
pipelineRobustnessCreateInfo.pNext = prevStruct;
prevStruct = &pipelineRobustnessCreateInfo;
pipelineRobustnessCreateInfo.storageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
pipelineRobustnessCreateInfo.uniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
pipelineRobustnessCreateInfo.vertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
pipelineRobustnessCreateInfo.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT;
}
pipelineInfo.pNext = prevStruct;
VkPipeline pipeline = VK_NULL_HANDLE;
VkResult result;
uint8 retryCount = 0;
@ -1075,3 +1094,31 @@ void PipelineCompiler::TrackAsCached(uint64 baseHash, uint64 pipelineStateHash)
return;
pipelineCache.AddCurrentStateToCache(baseHash, pipelineStateHash);
}
// calculate whether the pipeline requires robust buffer access
// if there is a potential risk for a shader to do out-of-bounds reads or writes we need to enable robust buffer access
// this can happen when:
// - Streamout is used with too small of a buffer (probably? Could also be some issue with how the streamout array index is calculated -> We can maybe fix this in the future)
// - The shader uses dynamic indices for uniform access. This will trigger the uniform mode to be FULL_CBANK
bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader)
{
bool requiresRobustBufferAcces = false;
if (vertexShader)
{
cemu_assert_debug(vertexShader->shaderType == LatteConst::ShaderType::Vertex);
requiresRobustBufferAcces |= vertexShader->hasStreamoutBufferWrite;
requiresRobustBufferAcces |= vertexShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
}
if (geometryShader)
{
cemu_assert_debug(geometryShader->shaderType == LatteConst::ShaderType::Geometry);
requiresRobustBufferAcces |= geometryShader->hasStreamoutBufferWrite;
requiresRobustBufferAcces |= geometryShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
}
if (pixelShader)
{
cemu_assert_debug(pixelShader->shaderType == LatteConst::ShaderType::Pixel);
requiresRobustBufferAcces |= pixelShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
}
return requiresRobustBufferAcces;
}

View file

@ -38,11 +38,14 @@ public:
RendererShaderVk* m_vkPixelShader{};
RendererShaderVk* m_vkGeometryShader{};
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj);
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess);
void TrackAsCached(uint64 baseHash, uint64 pipelineStateHash); // stores pipeline to permanent cache if not yet cached. Must be called synchronously from render thread due to dependency on GPU state
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
VkPipelineLayout m_pipelineLayout;
VKRObjectRenderPass* m_renderPassObj{};
bool m_requestRobustBufferAccess{false};
/* shader stages */
std::vector<VkPipelineShaderStageCreateInfo> shaderStages;

View file

@ -277,8 +277,9 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
m_pipelineIsCachedLock.unlock();
// compile
{
PipelineCompiler pp;
if (!pp.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass))
PipelineCompiler pipelineCompiler;
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
if (!pipelineCompiler.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass, requiresRobustBufferAccess))
{
s_spinlockSharedInternal.lock();
delete lcr;
@ -286,8 +287,7 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
s_spinlockSharedInternal.unlock();
return;
}
pp.Compile(true, true, false);
// destroy pp early
pipelineCompiler.Compile(true, true, false);
}
// on success, calculate pipeline hash and flag as present in cache
uint64 pipelineBaseHash = vertexShader->baseHash;

View file

@ -50,7 +50,8 @@ const std::vector<const char*> kOptionalDeviceExtensions =
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
VK_KHR_PRESENT_WAIT_EXTENSION_NAME,
VK_KHR_PRESENT_ID_EXTENSION_NAME,
VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME
VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME
};
const std::vector<const char*> kRequiredDeviceExtensions =
@ -263,6 +264,14 @@ void VulkanRenderer::GetDeviceFeatures()
pwf.pNext = prevStruct;
prevStruct = &pwf;
VkPhysicalDevicePipelineRobustnessFeaturesEXT pprf{};
if (m_featureControl.deviceExtensions.pipeline_robustness)
{
pprf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
pprf.pNext = prevStruct;
prevStruct = &pprf;
}
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
physicalDeviceFeatures2.pNext = prevStruct;
@ -317,6 +326,11 @@ void VulkanRenderer::GetDeviceFeatures()
{
cemuLog_log(LogType::Force, "VK_EXT_depth_clip_enable not supported");
}
if (m_featureControl.deviceExtensions.pipeline_robustness)
{
if ( pprf.pipelineRobustness != VK_TRUE )
m_featureControl.deviceExtensions.pipeline_robustness = false;
}
// get limits
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
@ -475,11 +489,17 @@ VulkanRenderer::VulkanRenderer()
deviceFeatures.occlusionQueryPrecise = VK_TRUE;
deviceFeatures.depthClamp = VK_TRUE;
deviceFeatures.depthBiasClamp = VK_TRUE;
if (m_vendor == GfxVendor::AMD)
if (m_featureControl.deviceExtensions.pipeline_robustness)
{
deviceFeatures.robustBufferAccess = VK_TRUE;
cemuLog_log(LogType::Force, "Enable robust buffer access");
deviceFeatures.robustBufferAccess = VK_FALSE;
}
else
{
cemuLog_log(LogType::Force, "VK_EXT_pipeline_robustness not supported. Falling back to robustBufferAccess");
deviceFeatures.robustBufferAccess = VK_TRUE;
}
if (m_featureControl.mode.useTFEmulationViaSSBO)
{
deviceFeatures.vertexPipelineStoresAndAtomics = true;
@ -524,6 +544,15 @@ VulkanRenderer::VulkanRenderer()
deviceExtensionFeatures = &presentWaitFeature;
presentWaitFeature.presentWait = VK_TRUE;
}
// enable VK_EXT_pipeline_robustness
VkPhysicalDevicePipelineRobustnessFeaturesEXT pipelineRobustnessFeature{};
if (m_featureControl.deviceExtensions.pipeline_robustness)
{
pipelineRobustnessFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
pipelineRobustnessFeature.pNext = deviceExtensionFeatures;
deviceExtensionFeatures = &pipelineRobustnessFeature;
pipelineRobustnessFeature.pipelineRobustness = VK_TRUE;
}
std::vector<const char*> used_extensions;
VkDeviceCreateInfo createInfo = CreateDeviceCreateInfo(queueCreateInfos, deviceFeatures, deviceExtensionFeatures, used_extensions);
@ -1127,6 +1156,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector<VkDe
used_extensions.emplace_back(VK_KHR_PRESENT_ID_EXTENSION_NAME);
used_extensions.emplace_back(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
}
if (m_featureControl.deviceExtensions.pipeline_robustness)
used_extensions.emplace_back(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
VkDeviceCreateInfo createInfo{};
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
@ -1224,6 +1255,7 @@ bool VulkanRenderer::CheckDeviceExtensionSupport(const VkPhysicalDevice device,
info.deviceExtensions.shader_float_controls = isExtensionAvailable(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
info.deviceExtensions.depth_clip_enable = isExtensionAvailable(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
info.deviceExtensions.pipeline_robustness = isExtensionAvailable(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
info.deviceExtensions.present_wait = isExtensionAvailable(VK_KHR_PRESENT_WAIT_EXTENSION_NAME) && isExtensionAvailable(VK_KHR_PRESENT_ID_EXTENSION_NAME);

View file

@ -453,6 +453,7 @@ private:
bool shader_float_controls = false; // VK_KHR_shader_float_controls
bool present_wait = false; // VK_KHR_present_wait
bool depth_clip_enable = false; // VK_EXT_depth_clip_enable
bool pipeline_robustness = false; // VK_EXT_pipeline_robustness
}deviceExtensions;
struct

View file

@ -298,7 +298,8 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
// init pipeline compiler
PipelineCompiler* pipelineCompiler = new PipelineCompiler();
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj());
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj(), requiresRobustBufferAccess);
pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash);
// use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable)