mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 13:01:18 +12:00
Vulkan: Use per-pipeline buffer robustness
And if the extension is not supported then fallback to enabling robust buffer access for all shaders.
This commit is contained in:
parent
081ebead5f
commit
61484598fc
6 changed files with 96 additions and 12 deletions
|
@ -873,7 +873,7 @@ void PipelineCompiler::InitDynamicState(PipelineInfo* pipelineInfo, bool usesBle
|
|||
dynamicState.pDynamicStates = dynamicStates.data();
|
||||
}
|
||||
|
||||
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj)
|
||||
bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess)
|
||||
{
|
||||
VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance();
|
||||
|
||||
|
@ -888,6 +888,7 @@ bool PipelineCompiler::InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const
|
|||
m_vkGeometryShader = pipelineInfo->geometryShaderVk;
|
||||
m_vkrObjPipeline = pipelineInfo->m_vkrObjPipeline;
|
||||
m_renderPassObj = renderPassObj;
|
||||
m_requestRobustBufferAccess = requireRobustBufferAccess;
|
||||
|
||||
// if required generate RECT emulation geometry shader
|
||||
if (!vkRenderer->m_featureControl.deviceExtensions.nv_fill_rectangle && isPrimitiveRect)
|
||||
|
@ -998,6 +999,8 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
if (!forceCompile)
|
||||
pipelineInfo.flags |= VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT;
|
||||
|
||||
void* prevStruct = nullptr;
|
||||
|
||||
VkPipelineCreationFeedbackCreateInfoEXT creationFeedbackInfo;
|
||||
VkPipelineCreationFeedbackEXT creationFeedback;
|
||||
std::vector<VkPipelineCreationFeedbackEXT> creationStageFeedback(0);
|
||||
|
@ -1015,9 +1018,25 @@ bool PipelineCompiler::Compile(bool forceCompile, bool isRenderThread, bool show
|
|||
creationFeedbackInfo.pPipelineCreationFeedback = &creationFeedback;
|
||||
creationFeedbackInfo.pPipelineStageCreationFeedbacks = creationStageFeedback.data();
|
||||
creationFeedbackInfo.pipelineStageCreationFeedbackCount = pipelineInfo.stageCount;
|
||||
pipelineInfo.pNext = &creationFeedbackInfo;
|
||||
creationFeedbackInfo.pNext = prevStruct;
|
||||
prevStruct = &creationFeedbackInfo;
|
||||
}
|
||||
|
||||
VkPipelineRobustnessCreateInfoEXT pipelineRobustnessCreateInfo{};
|
||||
if (vkRenderer->m_featureControl.deviceExtensions.pipeline_robustness && m_requestRobustBufferAccess)
|
||||
{
|
||||
// per-pipeline handling of robust buffer access, if the extension is not available then we fall back to device feature robustBufferAccess
|
||||
pipelineRobustnessCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_ROBUSTNESS_CREATE_INFO_EXT;
|
||||
pipelineRobustnessCreateInfo.pNext = prevStruct;
|
||||
prevStruct = &pipelineRobustnessCreateInfo;
|
||||
pipelineRobustnessCreateInfo.storageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
|
||||
pipelineRobustnessCreateInfo.uniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
|
||||
pipelineRobustnessCreateInfo.vertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
|
||||
pipelineRobustnessCreateInfo.images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT;
|
||||
}
|
||||
|
||||
pipelineInfo.pNext = prevStruct;
|
||||
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
VkResult result;
|
||||
uint8 retryCount = 0;
|
||||
|
@ -1075,3 +1094,31 @@ void PipelineCompiler::TrackAsCached(uint64 baseHash, uint64 pipelineStateHash)
|
|||
return;
|
||||
pipelineCache.AddCurrentStateToCache(baseHash, pipelineStateHash);
|
||||
}
|
||||
|
||||
// calculate whether the pipeline requires robust buffer access
|
||||
// if there is a potential risk for a shader to do out-of-bounds reads or writes we need to enable robust buffer access
|
||||
// this can happen when:
|
||||
// - Streamout is used with too small of a buffer (probably? Could also be some issue with how the streamout array index is calculated -> We can maybe fix this in the future)
|
||||
// - The shader uses dynamic indices for uniform access. This will trigger the uniform mode to be FULL_CBANK
|
||||
bool PipelineCompiler::CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader)
|
||||
{
|
||||
bool requiresRobustBufferAcces = false;
|
||||
if (vertexShader)
|
||||
{
|
||||
cemu_assert_debug(vertexShader->shaderType == LatteConst::ShaderType::Vertex);
|
||||
requiresRobustBufferAcces |= vertexShader->hasStreamoutBufferWrite;
|
||||
requiresRobustBufferAcces |= vertexShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
if (geometryShader)
|
||||
{
|
||||
cemu_assert_debug(geometryShader->shaderType == LatteConst::ShaderType::Geometry);
|
||||
requiresRobustBufferAcces |= geometryShader->hasStreamoutBufferWrite;
|
||||
requiresRobustBufferAcces |= geometryShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
if (pixelShader)
|
||||
{
|
||||
cemu_assert_debug(pixelShader->shaderType == LatteConst::ShaderType::Pixel);
|
||||
requiresRobustBufferAcces |= pixelShader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK;
|
||||
}
|
||||
return requiresRobustBufferAcces;
|
||||
}
|
||||
|
|
|
@ -38,11 +38,14 @@ public:
|
|||
RendererShaderVk* m_vkPixelShader{};
|
||||
RendererShaderVk* m_vkGeometryShader{};
|
||||
|
||||
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj);
|
||||
bool InitFromCurrentGPUState(PipelineInfo* pipelineInfo, const LatteContextRegister& latteRegister, VKRObjectRenderPass* renderPassObj, bool requireRobustBufferAccess);
|
||||
void TrackAsCached(uint64 baseHash, uint64 pipelineStateHash); // stores pipeline to permanent cache if not yet cached. Must be called synchronously from render thread due to dependency on GPU state
|
||||
|
||||
static bool CalcRobustBufferAccessRequirement(LatteDecompilerShader* vertexShader, LatteDecompilerShader* pixelShader, LatteDecompilerShader* geometryShader);
|
||||
|
||||
VkPipelineLayout m_pipelineLayout;
|
||||
VKRObjectRenderPass* m_renderPassObj{};
|
||||
bool m_requestRobustBufferAccess{false};
|
||||
|
||||
/* shader stages */
|
||||
std::vector<VkPipelineShaderStageCreateInfo> shaderStages;
|
||||
|
|
|
@ -277,8 +277,9 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
|
|||
m_pipelineIsCachedLock.unlock();
|
||||
// compile
|
||||
{
|
||||
PipelineCompiler pp;
|
||||
if (!pp.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass))
|
||||
PipelineCompiler pipelineCompiler;
|
||||
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
|
||||
if (!pipelineCompiler.InitFromCurrentGPUState(pipelineInfo, *lcr, renderPass, requiresRobustBufferAccess))
|
||||
{
|
||||
s_spinlockSharedInternal.lock();
|
||||
delete lcr;
|
||||
|
@ -286,8 +287,7 @@ void VulkanPipelineStableCache::LoadPipelineFromCache(std::span<uint8> fileData)
|
|||
s_spinlockSharedInternal.unlock();
|
||||
return;
|
||||
}
|
||||
pp.Compile(true, true, false);
|
||||
// destroy pp early
|
||||
pipelineCompiler.Compile(true, true, false);
|
||||
}
|
||||
// on success, calculate pipeline hash and flag as present in cache
|
||||
uint64 pipelineBaseHash = vertexShader->baseHash;
|
||||
|
|
|
@ -50,7 +50,8 @@ const std::vector<const char*> kOptionalDeviceExtensions =
|
|||
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
|
||||
VK_KHR_PRESENT_WAIT_EXTENSION_NAME,
|
||||
VK_KHR_PRESENT_ID_EXTENSION_NAME,
|
||||
VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME
|
||||
VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME,
|
||||
VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME
|
||||
};
|
||||
|
||||
const std::vector<const char*> kRequiredDeviceExtensions =
|
||||
|
@ -263,6 +264,14 @@ void VulkanRenderer::GetDeviceFeatures()
|
|||
pwf.pNext = prevStruct;
|
||||
prevStruct = &pwf;
|
||||
|
||||
VkPhysicalDevicePipelineRobustnessFeaturesEXT pprf{};
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
pprf.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
|
||||
pprf.pNext = prevStruct;
|
||||
prevStruct = &pprf;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceFeatures2 physicalDeviceFeatures2{};
|
||||
physicalDeviceFeatures2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
|
||||
physicalDeviceFeatures2.pNext = prevStruct;
|
||||
|
@ -317,6 +326,11 @@ void VulkanRenderer::GetDeviceFeatures()
|
|||
{
|
||||
cemuLog_log(LogType::Force, "VK_EXT_depth_clip_enable not supported");
|
||||
}
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
if ( pprf.pipelineRobustness != VK_TRUE )
|
||||
m_featureControl.deviceExtensions.pipeline_robustness = false;
|
||||
}
|
||||
// get limits
|
||||
m_featureControl.limits.minUniformBufferOffsetAlignment = std::max(prop2.properties.limits.minUniformBufferOffsetAlignment, (VkDeviceSize)4);
|
||||
m_featureControl.limits.nonCoherentAtomSize = std::max(prop2.properties.limits.nonCoherentAtomSize, (VkDeviceSize)4);
|
||||
|
@ -475,11 +489,17 @@ VulkanRenderer::VulkanRenderer()
|
|||
deviceFeatures.occlusionQueryPrecise = VK_TRUE;
|
||||
deviceFeatures.depthClamp = VK_TRUE;
|
||||
deviceFeatures.depthBiasClamp = VK_TRUE;
|
||||
if (m_vendor == GfxVendor::AMD)
|
||||
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
deviceFeatures.robustBufferAccess = VK_TRUE;
|
||||
cemuLog_log(LogType::Force, "Enable robust buffer access");
|
||||
deviceFeatures.robustBufferAccess = VK_FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "VK_EXT_pipeline_robustness not supported. Falling back to robustBufferAccess");
|
||||
deviceFeatures.robustBufferAccess = VK_TRUE;
|
||||
}
|
||||
|
||||
if (m_featureControl.mode.useTFEmulationViaSSBO)
|
||||
{
|
||||
deviceFeatures.vertexPipelineStoresAndAtomics = true;
|
||||
|
@ -524,6 +544,15 @@ VulkanRenderer::VulkanRenderer()
|
|||
deviceExtensionFeatures = &presentWaitFeature;
|
||||
presentWaitFeature.presentWait = VK_TRUE;
|
||||
}
|
||||
// enable VK_EXT_pipeline_robustness
|
||||
VkPhysicalDevicePipelineRobustnessFeaturesEXT pipelineRobustnessFeature{};
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
{
|
||||
pipelineRobustnessFeature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT;
|
||||
pipelineRobustnessFeature.pNext = deviceExtensionFeatures;
|
||||
deviceExtensionFeatures = &pipelineRobustnessFeature;
|
||||
pipelineRobustnessFeature.pipelineRobustness = VK_TRUE;
|
||||
}
|
||||
|
||||
std::vector<const char*> used_extensions;
|
||||
VkDeviceCreateInfo createInfo = CreateDeviceCreateInfo(queueCreateInfos, deviceFeatures, deviceExtensionFeatures, used_extensions);
|
||||
|
@ -1127,6 +1156,8 @@ VkDeviceCreateInfo VulkanRenderer::CreateDeviceCreateInfo(const std::vector<VkDe
|
|||
used_extensions.emplace_back(VK_KHR_PRESENT_ID_EXTENSION_NAME);
|
||||
used_extensions.emplace_back(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
|
||||
}
|
||||
if (m_featureControl.deviceExtensions.pipeline_robustness)
|
||||
used_extensions.emplace_back(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
|
||||
|
||||
VkDeviceCreateInfo createInfo{};
|
||||
createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
|
@ -1224,6 +1255,7 @@ bool VulkanRenderer::CheckDeviceExtensionSupport(const VkPhysicalDevice device,
|
|||
info.deviceExtensions.shader_float_controls = isExtensionAvailable(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
|
||||
info.deviceExtensions.dynamic_rendering = false; // isExtensionAvailable(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
|
||||
info.deviceExtensions.depth_clip_enable = isExtensionAvailable(VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME);
|
||||
info.deviceExtensions.pipeline_robustness = isExtensionAvailable(VK_EXT_PIPELINE_ROBUSTNESS_EXTENSION_NAME);
|
||||
// dynamic rendering doesn't provide any benefits for us right now. Driver implementations are very unoptimized as of Feb 2022
|
||||
info.deviceExtensions.present_wait = isExtensionAvailable(VK_KHR_PRESENT_WAIT_EXTENSION_NAME) && isExtensionAvailable(VK_KHR_PRESENT_ID_EXTENSION_NAME);
|
||||
|
||||
|
|
|
@ -453,6 +453,7 @@ private:
|
|||
bool shader_float_controls = false; // VK_KHR_shader_float_controls
|
||||
bool present_wait = false; // VK_KHR_present_wait
|
||||
bool depth_clip_enable = false; // VK_EXT_depth_clip_enable
|
||||
bool pipeline_robustness = false; // VK_EXT_pipeline_robustness
|
||||
}deviceExtensions;
|
||||
|
||||
struct
|
||||
|
|
|
@ -298,7 +298,8 @@ PipelineInfo* VulkanRenderer::draw_createGraphicsPipeline(uint32 indexCount)
|
|||
// init pipeline compiler
|
||||
PipelineCompiler* pipelineCompiler = new PipelineCompiler();
|
||||
|
||||
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj());
|
||||
bool requiresRobustBufferAccess = PipelineCompiler::CalcRobustBufferAccessRequirement(vertexShader, pixelShader, geometryShader);
|
||||
pipelineCompiler->InitFromCurrentGPUState(pipelineInfo, LatteGPUState.contextNew, vkFBO->GetRenderPassObj(), requiresRobustBufferAccess);
|
||||
pipelineCompiler->TrackAsCached(vsBaseHash, pipelineHash);
|
||||
|
||||
// use heuristics based on parameter patterns to determine if the current drawcall is essential (non-skipable)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue