diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 6f2b182833..a18b1d3dfc 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -425,22 +425,23 @@ VKGSRender::VKGSRender() : GSRender() m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0); } - //Generate frame contexts + // Generate frame contexts + const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls(); const auto& binding_table = m_device->get_pipeline_binding_table(); const u32 num_fs_samplers = binding_table.vertex_textures_first_bind_slot - binding_table.textures_first_bind_slot; std::vector sizes; - sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS }); - sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }); - sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , (num_fs_samplers + 4) * max_draw_calls }); // Conditional rendering predicate slot; refactor to allow skipping this when not needed - sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * DESCRIPTOR_MAX_DRAW_CALLS }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1 * max_draw_calls }); VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - //VRAM allocation + // VRAM allocation m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000, VK_TRUE); m_fragment_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "fragment env buffer"); m_vertex_env_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "vertex env buffer"); @@ -476,7 +477,7 @@ VKGSRender::VKGSRender() : GSRender() { vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore); vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore); - ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size()), DESCRIPTOR_MAX_DRAW_CALLS, 1); + ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast(sizes.size()), max_draw_calls, 1); } const auto& memory_map = m_device->get_memory_mapping(); @@ -1063,8 +1064,7 @@ void VKGSRender::check_descriptors() { // Ease resource pressure if the number of draw calls becomes too high or we are running low on memory resources const auto required_descriptors = rsx::method_registers.current_draw_clause.pass_count(); - ensure(required_descriptors < DESCRIPTOR_MAX_DRAW_CALLS); - if ((required_descriptors + m_current_frame->used_descriptors) > DESCRIPTOR_MAX_DRAW_CALLS) + if (!m_current_frame->descriptor_pool.can_allocate(required_descriptors, m_current_frame->used_descriptors)) { // Should hard sync before resetting descriptors for spec compliance flush_command_queue(true); @@ -1078,7 +1078,6 @@ VkDescriptorSet VKGSRender::allocate_descriptor_set() { if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] { - ensure(m_current_frame->used_descriptors < DESCRIPTOR_MAX_DRAW_CALLS); return m_current_frame->descriptor_pool.allocate(descriptor_layouts, VK_TRUE, m_current_frame->used_descriptors++); } else diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 58019eaae7..feb0f0fd59 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -16,7 +16,6 @@ #include "Emu/RSX/Common/TextureUtils.h" #include "Emu/RSX/rsx_utils.h" -#define DESCRIPTOR_MAX_DRAW_CALLS 16384 #define OCCLUSION_MAX_POOL_SIZE DESCRIPTOR_MAX_DRAW_CALLS #define FRAME_PRESENT_TIMEOUT 10000000ull // 10 seconds diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index c2df537a49..294627b4cb 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -373,13 +373,15 @@ namespace vk void shader_interpreter::create_descriptor_pools(const vk::render_device& dev) { - std::vector sizes; - sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * DESCRIPTOR_MAX_DRAW_CALLS }); - sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * DESCRIPTOR_MAX_DRAW_CALLS }); - sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 68 * DESCRIPTOR_MAX_DRAW_CALLS }); - sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 * DESCRIPTOR_MAX_DRAW_CALLS }); + const auto max_draw_calls = dev.get_descriptor_max_draw_calls(); - m_descriptor_pool.create(dev, sizes.data(), ::size32(sizes), DESCRIPTOR_MAX_DRAW_CALLS, 2); + std::vector sizes; + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER , 6 * max_draw_calls }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER , 3 * max_draw_calls }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER , 68 * max_draw_calls }); + sizes.push_back({ VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3 * max_draw_calls }); + + m_descriptor_pool.create(dev, sizes.data(), ::size32(sizes), max_draw_calls, 2); } void shader_interpreter::init(const vk::render_device& dev) @@ -513,7 +515,7 @@ namespace vk VkDescriptorSet shader_interpreter::allocate_descriptor_set() { - if (m_used_descriptors == DESCRIPTOR_MAX_DRAW_CALLS) + if (!m_descriptor_pool.can_allocate(1u, m_used_descriptors)) { m_descriptor_pool.reset(0); m_used_descriptors = 0; diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 611e2aae0e..76b6950414 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -109,7 +109,7 @@ namespace vk { ensure(subpool_count); - info.flags = dev.get_descriptor_indexing_support() ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0; + info.flags = dev.get_descriptor_update_after_bind_support() ? VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT : 0; info.maxSets = max_sets; info.poolSizeCount = size_descriptors_count; info.pPoolSizes = sizes; @@ -139,16 +139,6 @@ namespace vk m_owner = nullptr; } - bool descriptor_pool::valid() const - { - return (!m_device_pools.empty()); - } - - descriptor_pool::operator VkDescriptorPool() - { - return m_current_pool_handle; - } - void descriptor_pool::reset(VkDescriptorPoolResetFlags flags) { m_descriptor_set_cache.clear(); @@ -194,11 +184,10 @@ namespace vk if (use_cache) { + ensure(used_count < info.maxSets); const auto alloc_size = std::min(info.maxSets - used_count, max_cache_size); - ensure(alloc_size); ensure(m_descriptor_set_cache.empty()); - alloc_info.descriptorSetCount = alloc_size; alloc_info.pSetLayouts = m_allocation_request_cache.data(); diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index c77a423107..8ff4080830 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -20,11 +20,13 @@ namespace vk void destroy(); void reset(VkDescriptorPoolResetFlags flags); - bool valid() const; - operator VkDescriptorPool(); - VkDescriptorSet allocate(VkDescriptorSetLayout layout, VkBool32 use_cache, u32 used_count); + operator VkDescriptorPool() { return m_current_pool_handle; } + FORCE_INLINE bool valid() const { return (!m_device_pools.empty()); } + FORCE_INLINE u32 max_sets() const { return info.maxSets; } + FORCE_INLINE bool can_allocate(u32 required_count, u32 used_count) const { return (used_count + required_count) <= info.maxSets; }; + private: const vk::render_device* m_owner = nullptr; VkDescriptorPoolCreateInfo info = {}; diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index e538ef73ce..26cd04dbd1 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -85,13 +85,65 @@ namespace vk surface_capabilities_2_support = instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); } + void physical_device::get_physical_device_properties(bool allow_extensions) + { + vkGetPhysicalDeviceMemoryProperties(dev, &memory_properties); + + if (!allow_extensions) + { + vkGetPhysicalDeviceProperties(dev, &props); + return; + } + + supported_extensions instance_extensions(supported_extensions::instance); + if (!instance_extensions.is_supported(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME)) + { + vkGetPhysicalDeviceProperties(dev, &props); + } + else + { + VkPhysicalDeviceProperties2KHR properties2; + properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + properties2.pNext = nullptr; + + VkPhysicalDeviceDescriptorIndexingPropertiesEXT descriptor_indexing_props{}; + + if (descriptor_indexing_support) + { + descriptor_indexing_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; + descriptor_indexing_props.pNext = properties2.pNext; + properties2.pNext = &descriptor_indexing_props; + } + + auto _vkGetPhysicalDeviceProperties2KHR = reinterpret_cast(vkGetInstanceProcAddr(parent, "vkGetPhysicalDeviceProperties2KHR")); + ensure(_vkGetPhysicalDeviceProperties2KHR); + + _vkGetPhysicalDeviceProperties2KHR(dev, &properties2); + props = properties2.properties; + + if (descriptor_indexing_support) + { + if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 800'000) + { + rsx_log.error("Physical device does not support enough descriptors for deferred updates to work effectively. Deferred updates are disabled."); + descriptor_update_after_bind_mask = 0; + } + else if (descriptor_indexing_props.maxUpdateAfterBindDescriptorsInAllPools < 2'000'000) + { + rsx_log.warning("Physical device reports a low amount of allowed deferred descriptor updates. Draw call threshold will be lowered accordingly."); + descriptor_max_draw_calls = 8192; + } + } + } + } + void physical_device::create(VkInstance context, VkPhysicalDevice pdev, bool allow_extensions) { dev = pdev; parent = context; - vkGetPhysicalDeviceProperties(pdev, &props); - vkGetPhysicalDeviceMemoryProperties(pdev, &memory_properties); + get_physical_device_features(allow_extensions); + get_physical_device_properties(allow_extensions); rsx_log.always()("Found vulkan-compatible GPU: '%s' running on driver %s", get_name(), get_driver_version()); @@ -714,6 +766,11 @@ namespace vk return pgpu->descriptor_update_after_bind_mask; } + u32 render_device::get_descriptor_max_draw_calls() const + { + return pgpu->descriptor_max_draw_calls; + } + mem_allocator_base* render_device::get_allocator() const { return m_allocator.get(); diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 993ac69d01..fadf71c252 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -9,6 +9,8 @@ #include #include +#define DESCRIPTOR_MAX_DRAW_CALLS 16384 + namespace vk { struct gpu_formats_support @@ -62,10 +64,12 @@ namespace vk bool descriptor_indexing_support = false; u64 descriptor_update_after_bind_mask = 0; + u32 descriptor_max_draw_calls = DESCRIPTOR_MAX_DRAW_CALLS; friend class render_device; private: void get_physical_device_features(bool allow_extensions); + void get_physical_device_properties(bool allow_extensions); public: @@ -147,6 +151,7 @@ namespace vk bool get_descriptor_indexing_support() const; u64 get_descriptor_update_after_bind_support() const; + u32 get_descriptor_max_draw_calls() const; VkQueue get_present_queue() const; VkQueue get_graphics_queue() const;