From 49729086ac3f53b2493781a4bb7bb8020a93c405 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 8 Jun 2025 21:21:51 +0300 Subject: [PATCH 01/30] vk: Move descriptor management to the pipeline layer - Frees up callers from managing descriptors themselves (ewww) - Makes descriptor reuse possible - Opens up the door to techniques like descriptor_buffer by abstracting away management to an implementation detail --- rpcs3/Emu/RSX/VK/VKCompute.cpp | 118 +++----- rpcs3/Emu/RSX/VK/VKCompute.h | 26 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 38 +-- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 65 ++-- rpcs3/Emu/RSX/VK/VKGSRender.h | 7 - rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp | 3 - rpcs3/Emu/RSX/VK/VKOverlays.cpp | 212 +++++-------- rpcs3/Emu/RSX/VK/VKOverlays.h | 26 +- rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp | 63 ++-- rpcs3/Emu/RSX/VK/VKPipelineCompiler.h | 38 ++- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 9 +- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 296 ++++++++++++++++--- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 134 +++++++-- rpcs3/Emu/RSX/VK/VKRenderTargets.h | 2 +- rpcs3/Emu/RSX/VK/VKResolveHelper.h | 74 +++-- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 46 +-- rpcs3/Emu/RSX/VK/VKShaderInterpreter.h | 10 +- rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp | 46 ++- rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h | 3 +- 19 files changed, 677 insertions(+), 539 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 637642d8bf..ae36723b81 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -8,64 +8,41 @@ namespace vk { - std::vector> compute_task::get_descriptor_layout() + std::vector compute_task::get_inputs() { - std::vector> result; - result.emplace_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ssbo_count); + std::vector result; + for (unsigned i = 0; i < ssbo_count; ++i) + { + const auto input = glsl::program_input::make + ( + ::glsl::glsl_compute_program, + "ssbo" + std::to_string(i), + glsl::program_input_type::input_type_storage_buffer, + i + ); + result.push_back(input); + } + + if (use_push_constants && push_constants_size > 0) + { + const auto input = glsl::program_input::make + ( + ::glsl::glsl_compute_program, + "push_constants", + glsl::program_input_type::input_type_push_constant, + 0, + glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } + ); + result.push_back(input); + } + return result; } - void compute_task::init_descriptors() - { - rsx::simple_array descriptor_pool_sizes; - rsx::simple_array bindings; - - const auto layout = get_descriptor_layout(); - for (const auto &e : layout) - { - descriptor_pool_sizes.push_back({e.first, e.second}); - - for (unsigned n = 0; n < e.second; ++n) - { - bindings.push_back - ({ - u32(bindings.size()), - e.first, - 1, - VK_SHADER_STAGE_COMPUTE_BIT, - nullptr - }); - } - } - - // Reserve descriptor pools - m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes); - m_descriptor_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &m_descriptor_layout; - - VkPushConstantRange push_constants{}; - if (use_push_constants) - { - push_constants.size = push_constants_size; - push_constants.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = &push_constants; - } - - CHECK_RESULT(vkCreatePipelineLayout(*g_render_device, &layout_info, nullptr, &m_pipeline_layout)); - } - void compute_task::create() { if (!initialized) { - init_descriptors(); - switch (vk::get_driver_vendor()) { case vk::driver_vendor::unknown: @@ -121,10 +98,6 @@ namespace vk m_program.reset(); m_param_buffer.reset(); - vkDestroyDescriptorSetLayout(*g_render_device, m_descriptor_layout, nullptr); - vkDestroyPipelineLayout(*g_render_device, m_pipeline_layout, nullptr); - m_descriptor_pool.destroy(); - initialized = false; } } @@ -142,26 +115,23 @@ namespace vk shader_stage.module = handle; shader_stage.pName = "main"; - VkComputePipelineCreateInfo info{}; - info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - info.stage = shader_stage; - info.layout = m_pipeline_layout; - info.basePipelineIndex = -1; - info.basePipelineHandle = VK_NULL_HANDLE; + VkComputePipelineCreateInfo create_info + { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = handle, + .pName = "main" + }, + }; auto compiler = vk::get_pipe_compiler(); - m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE); - declare_inputs(); + m_program = compiler->compile(create_info, vk::pipe_compiler::COMPILE_INLINE, {}, get_inputs()); } - ensure(m_used_descriptors < VK_MAX_COMPUTE_TASKS); - - m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout, VK_TRUE); - bind_resources(); - - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_program->pipeline); - m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout); + m_program->bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE); } void compute_task::run(const vk::command_buffer& cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) @@ -273,13 +243,13 @@ namespace vk void cs_shuffle_base::bind_resources() { - m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) { ensure(use_push_constants); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); } void cs_shuffle_base::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset) @@ -319,7 +289,7 @@ namespace vk void cs_interleave_task::bind_resources() { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void cs_interleave_task::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) @@ -379,8 +349,8 @@ namespace vk void cs_aggregator::bind_resources() { - m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void cs_aggregator::run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 4f9a3f2a3a..d4e99d8cf3 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -19,12 +19,6 @@ namespace vk std::unique_ptr m_program; std::unique_ptr m_param_buffer; - vk::descriptor_pool m_descriptor_pool; - descriptor_set m_descriptor_set; - VkDescriptorSetLayout m_descriptor_layout = nullptr; - VkPipelineLayout m_pipeline_layout = nullptr; - u32 m_used_descriptors = 0; - bool initialized = false; bool unroll_loops = true; bool use_push_constants = false; @@ -37,15 +31,11 @@ namespace vk compute_task() = default; virtual ~compute_task() { destroy(); } - virtual std::vector> get_descriptor_layout(); - - void init_descriptors(); - void create(); void destroy(); + virtual std::vector get_inputs(); virtual void bind_resources() {} - virtual void declare_inputs() {} void load_program(const vk::command_buffer& cmd); @@ -354,7 +344,7 @@ namespace vk void bind_resources() override { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset) @@ -455,13 +445,13 @@ namespace vk void bind_resources() override { - m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); } void run(const vk::command_buffer& cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 data_length, u32 width, u32 height, u32 depth, u32 mipmaps) override @@ -584,13 +574,13 @@ namespace vk void bind_resources() override { const auto op = static_cast(Op); - m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); } void run(const vk::command_buffer& cmd, const RSX_detiler_config& config) diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 3b760f103f..4d1253340d 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -555,8 +555,7 @@ bool VKGSRender::bind_texture_env() { m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + ::glsl::program_domain::glsl_fragment_program); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { @@ -578,7 +577,6 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, i, ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, true); } } @@ -587,15 +585,13 @@ bool VKGSRender::bind_texture_env() const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + ::glsl::program_domain::glsl_fragment_program); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, true); } } @@ -611,8 +607,7 @@ bool VKGSRender::bind_texture_env() const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + ::glsl::program_domain::glsl_vertex_program); continue; } @@ -635,8 +630,7 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + ::glsl::program_domain::glsl_vertex_program); continue; } @@ -645,8 +639,7 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + ::glsl::program_domain::glsl_vertex_program); } return out_of_memory; @@ -721,7 +714,7 @@ bool VKGSRender::bind_interpreter_texture_env() } } - m_shader_interpreter.update_fragment_textures(texture_env, m_current_frame->descriptor_set); + m_shader_interpreter.update_fragment_textures(texture_env); return out_of_memory; } @@ -850,6 +843,7 @@ void VKGSRender::emit_geometry(u32 sub_index) } else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) { + /* // Need to update descriptors; make a copy for the next draw VkDescriptorSet previous_set = m_current_frame->descriptor_set.value(); m_current_frame->descriptor_set.flush(); @@ -874,6 +868,8 @@ void VKGSRender::emit_geometry(u32 sub_index) m_current_frame->descriptor_set.push(copy_cmds); update_descriptors = true; + */ + fmt::throw_exception("Not implemented"); } // Update vertex fetch parameters @@ -882,9 +878,9 @@ void VKGSRender::emit_geometry(u32 sub_index) ensure(m_vertex_layout_storage); if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); + m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot); + m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1); + m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2); } bool reload_state = (!m_current_draw.subdraw_id++); @@ -908,10 +904,12 @@ void VKGSRender::emit_geometry(u32 sub_index) reload_state = true; }); + // Bind both pipe and descriptors in one go + // FIXME: We only need to rebind the pipeline when reload state is set. Flags? + m_program->bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS); + if (reload_state) { - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - update_draw_state(); begin_render_pass(); @@ -929,7 +927,6 @@ void VKGSRender::emit_geometry(u32 sub_index) } // Bind the new set of descriptors for use with this draw call - m_current_frame->descriptor_set.bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline_layout); m_frame_stats.setup_time += m_profiler.duration(); if (!upload_info.index_info) @@ -1083,9 +1080,6 @@ void VKGSRender::end() return; } - // Allocate descriptor set - m_current_frame->descriptor_set = allocate_descriptor_set(); - // Load program execution environment load_program_env(); m_frame_stats.setup_time += m_profiler.duration(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 107cd7b399..17f42f45e8 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -423,8 +423,8 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) std::vector& gpus = m_instance.enumerate_devices(); - //Actually confirm that the loader found at least one compatible device - //This should not happen unless something is wrong with the driver setup on the target system + // Actually confirm that the loader found at least one compatible device + // This should not happen unless something is wrong with the driver setup on the target system if (gpus.empty()) { //We can't throw in Emulator::Load, so we show error and return @@ -482,20 +482,16 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) swapchain_unavailable = true; } - //create command buffer... + // create command buffer... m_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_primary_cb_list.create(m_command_buffer_pool, vk::command_buffer::access_type_hint::flush_only); m_current_command_buffer = m_primary_cb_list.get(); m_current_command_buffer->begin(); - //Create secondary command_buffer for parallel operations + // Create secondary command_buffer for parallel operations m_secondary_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all); - //Precalculated stuff - rsx::simple_array binding_layout; - std::tie(m_pipeline_layout, m_descriptor_layouts, binding_layout) = vk::get_common_pipeline_layout(*m_device); - //Occlusion m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); m_occlusion_map.resize(rsx::reports::occlusion_query_count); @@ -508,11 +504,6 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0); } - // Generate frame contexts - const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls(); - const auto descriptor_type_sizes = vk::get_descriptor_pool_sizes(binding_layout); - m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls); - VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -852,12 +843,6 @@ VKGSRender::~VKGSRender() m_stencil_mirror_sampler.reset(); - // Pipeline descriptors - m_descriptor_pool.destroy(); - - vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); - vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layouts, nullptr); - // Queries m_occlusion_query_manager.reset(); m_cond_render_buffer.reset(); @@ -1157,18 +1142,6 @@ void VKGSRender::check_present_status() } } -VkDescriptorSet VKGSRender::allocate_descriptor_set() -{ - if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] - { - return m_descriptor_pool.allocate(m_descriptor_layouts, VK_TRUE); - } - else - { - return m_shader_interpreter.allocate_descriptor_set(); - } -} - void VKGSRender::set_viewport() { const auto [clip_width, clip_height] = rsx::apply_resolution_scale( @@ -1242,7 +1215,7 @@ void VKGSRender::on_init_thread() if (!m_overlay_manager) { m_frame->hide(); - m_shaders_cache->load(nullptr, m_pipeline_layout); + m_shaders_cache->load(nullptr); m_frame->show(); } else @@ -1250,7 +1223,7 @@ void VKGSRender::on_init_thread() rsx::shader_loading_dialog_native dlg(this); // TODO: Handle window resize messages during loading on GPUs without OUT_OF_DATE_KHR support - m_shaders_cache->load(&dlg, m_pipeline_layout); + m_shaders_cache->load(&dlg); } } @@ -1870,7 +1843,7 @@ bool VKGSRender::load_program() vertex_program, fragment_program, m_pipeline_properties, - shadermode != shader_mode::recompiler, true, m_pipeline_layout); + shadermode != shader_mode::recompiler, true); vk::leave_uninterruptible(); @@ -2103,32 +2076,32 @@ void VKGSRender::load_program_env() const auto& binding_table = m_device->get_pipeline_binding_table(); - m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); - m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot); + m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot); + m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot); if (!m_shader_interpreter.is_interpreter(m_program)) { - m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot); } else { - m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; - m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) { - m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); } // Clear flags @@ -2215,7 +2188,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ vkCmdPushConstants( *m_current_command_buffer, - m_pipeline_layout, + m_program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, data_length, diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index e16d8d1afa..61dc496402 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -105,11 +105,6 @@ private: vk::command_buffer_chunk* m_current_command_buffer = nullptr; std::unique_ptr m_host_object_data; - - vk::descriptor_pool m_descriptor_pool; - VkDescriptorSetLayout m_descriptor_layouts = VK_NULL_HANDLE; - VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; - vk::framebuffer_holder* m_draw_fbo = nullptr; sizeu m_swapchain_dims{}; @@ -220,8 +215,6 @@ private: void update_draw_state(); void check_present_status(); - VkDescriptorSet allocate_descriptor_set(); - vk::vertex_upload_info upload_vertex_data(); rsx::simple_array m_scratch_mem; diff --git a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp index acd4c42cb2..8f38378f52 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp +++ b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp @@ -178,8 +178,6 @@ namespace vk VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; - vk::descriptor_set descriptor_set; - rsx::flags32_t flags = 0; u32 present_image = -1; @@ -193,7 +191,6 @@ namespace vk { present_wait_semaphore = other.present_wait_semaphore; acquire_signal_semaphore = other.acquire_signal_semaphore; - descriptor_set.swap(other.descriptor_set); flags = other.flags; heap_snapshot = other.heap_snapshot; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index f4e7d092f9..5cd4761983 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -47,102 +47,38 @@ namespace vk } } - void overlay_pass::init_descriptors() - { - rsx::simple_array descriptor_pool_sizes = {}; - - if (m_num_uniform_buffers) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_num_uniform_buffers }); - }; - - if (m_num_usable_samplers) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_num_usable_samplers }); - } - - if (m_num_input_attachments) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, m_num_input_attachments }); - } - - // Reserve descriptor pools - m_descriptor_pool.create(*m_device, descriptor_pool_sizes); - - const auto num_bindings = m_num_uniform_buffers + m_num_usable_samplers + m_num_input_attachments; - rsx::simple_array bindings(num_bindings); - u32 binding_slot = 0; - - for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - ensure(binding_slot == num_bindings); - m_descriptor_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &m_descriptor_layout; - - std::vector push_constants = get_push_constants(); - if (!push_constants.empty()) - { - layout_info.pushConstantRangeCount = u32(push_constants.size()); - layout_info.pPushConstantRanges = push_constants.data(); - } - - CHECK_RESULT(vkCreatePipelineLayout(*m_device, &layout_info, nullptr, &m_pipeline_layout)); - } - std::vector overlay_pass::get_vertex_inputs() { check_heap(); - return{}; + return {}; } std::vector overlay_pass::get_fragment_inputs() { - std::vector fs_inputs; + using namespace vk::glsl; + + std::vector fs_inputs; u32 binding = 0; for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding) { const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : ""); - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, name }); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_uniform_buffer, 0); + fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding) { - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "fs" + std::to_string(n) }); + const std::string name = "fs" + std::to_string(n); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, binding); + fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding) { - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "sp" + std::to_string(n) }); + const std::string name = "sp" + std::to_string(n); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, binding); + fs_inputs.push_back(input); } return fs_inputs; @@ -208,20 +144,20 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = m_pipeline_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = render_pass; auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); + auto program = compiler->compile(info, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); auto result = program.get(); m_program_cache[storage_key] = std::move(program); return result; } - void overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) + vk::glsl::program* overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) { vk::glsl::program *program = nullptr; const auto key = get_pipeline_key(pass); @@ -232,8 +168,6 @@ namespace vk else program = build_pipeline(key, pass); - m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout); - if (!m_sampler && !src.empty()) { m_sampler = std::make_unique(*m_device, @@ -245,21 +179,22 @@ namespace vk if (m_num_uniform_buffers > 0) { - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0); } for (uint n = 0; n < src.size(); ++n) { VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout }; - program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); + program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); } - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline); - m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout); + program->bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); VkBuffer buffers = m_vao.heap->value; VkDeviceSize offsets = m_vao_offset; vkCmdBindVertexBuffers(cmd, 0, 1, &buffers, &offsets); + + return program; } void overlay_pass::create(const vk::render_device& dev) @@ -267,8 +202,6 @@ namespace vk if (!initialized) { m_device = &dev; - init_descriptors(); - initialized = true; } } @@ -282,10 +215,6 @@ namespace vk m_program_cache.clear(); m_sampler.reset(); - vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layout, nullptr); - vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); - m_descriptor_pool.destroy(); - initialized = false; } } @@ -303,7 +232,7 @@ namespace vk return vk::get_framebuffer(dev, target->width(), target->height(), m_num_input_attachments > 0, render_pass, { target }); } - void overlay_pass::emit_geometry(vk::command_buffer& cmd) + void overlay_pass::emit_geometry(vk::command_buffer& cmd, glsl::program* /*program*/) { vkCmdDraw(cmd, num_drawable_elements, 1, first_vertex, 0); } @@ -328,11 +257,11 @@ namespace vk // This call clobbers dynamic state cmd.flags |= vk::command_buffer::cb_reload_dynamic_state; - load_program(cmd, render_pass, src); + auto program = load_program(cmd, render_pass, src); set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height()); vk::begin_renderpass(cmd, render_pass, fbo->value, { positionu{0u, 0u}, sizeu{fbo->width(), fbo->height()} }); - emit_geometry(cmd); + emit_geometry(cmd, program); } void overlay_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::image* target, const std::vector& src, VkRenderPass render_pass) @@ -550,24 +479,37 @@ namespace vk false, true, desc->get_data(), owner_uid); } - std::vector ui_overlay_renderer::get_push_constants() + std::vector ui_overlay_renderer::get_vertex_inputs() { - return - { - { - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .offset = 0, - .size = 68 - }, - { - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .offset = 68, - .size = 12 - } - }; + auto result = overlay_pass::get_vertex_inputs(); + result.push_back( + glsl::program_input::make( + ::glsl::glsl_vertex_program, + "push_constants", + glsl::input_type_push_constant, + 0, + glsl::push_constant_ref { .size = 68 } + ) + ); + return result; } - void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + std::vector ui_overlay_renderer::get_fragment_inputs() + { + auto result = overlay_pass::get_fragment_inputs(); + result.push_back( + glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + glsl::input_type_push_constant, + 0, + glsl::push_constant_ref {.offset = 68, .size = 12 } + ) + ); + return result; + } + + void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { // Byte Layout // 00: vec4 ui_scale; @@ -600,7 +542,7 @@ namespace vk .get(); push_buf[16] = std::bit_cast(vert_config); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); // 2. Fragment stuff rsx::overlays::fragment_options frag_opts; @@ -614,7 +556,7 @@ namespace vk push_buf[1] = m_time; push_buf[2] = m_blur_strength; - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); } void ui_overlay_renderer::set_primitive_type(rsx::overlays::primitive_type type) @@ -641,7 +583,7 @@ namespace vk } } - void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd) + void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd, glsl::program* program) { if (m_current_primitive_type == rsx::overlays::primitive_type::quad_list) { @@ -657,7 +599,7 @@ namespace vk } else { - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } @@ -764,17 +706,20 @@ namespace vk renderpass_config.set_attachment_count(1); } - std::vector attachment_clear_pass::get_push_constants() + std::vector attachment_clear_pass::get_vertex_inputs() { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - constant.offset = 0; - constant.size = 32; - - return { constant }; + return + { + vk::glsl::program_input::make( + ::glsl::glsl_vertex_program, + "push_constants", + vk::glsl::input_type_push_constant, + 0, + glsl::push_constant_ref{ .size = 32 }) + }; } - void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { f32 data[8]; data[0] = clear_color.r; @@ -786,7 +731,7 @@ namespace vk data[6] = colormask.b; data[7] = colormask.a; - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); } void attachment_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -910,19 +855,24 @@ namespace vk m_num_usable_samplers = 2; } - std::vector video_out_calibration_pass::get_push_constants() + std::vector video_out_calibration_pass::get_fragment_inputs() { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - constant.offset = 0; - constant.size = 16; - - return { constant }; + auto result = overlay_pass::get_fragment_inputs(); + result.push_back( + vk::glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + vk::glsl::input_type_push_constant, + 0, + glsl::push_constant_ref{ .size = 16 } + ) + ); + return result; } - void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); } void video_out_calibration_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 7308a5c894..a968f706a1 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -44,11 +44,6 @@ namespace vk vk::glsl::shader m_vertex_shader; vk::glsl::shader m_fragment_shader; - vk::descriptor_pool m_descriptor_pool; - descriptor_set m_descriptor_set; - VkDescriptorSetLayout m_descriptor_layout = nullptr; - VkPipelineLayout m_pipeline_layout = nullptr; - VkFilter m_sampler_filter = VK_FILTER_LINEAR; u32 m_num_usable_samplers = 1; u32 m_num_input_attachments = 0; @@ -83,8 +78,6 @@ namespace vk void check_heap(); - void init_descriptors(); - virtual void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/) {} virtual std::vector get_vertex_inputs(); @@ -92,11 +85,6 @@ namespace vk virtual void get_dynamic_state_entries(std::vector& /*state_descriptors*/) {} - virtual std::vector get_push_constants() - { - return {}; - } - int sampler_location(int index) const { return 1 + index; } int input_attachment_location(int index) const { return 1 + m_num_usable_samplers + index; } @@ -113,8 +101,7 @@ namespace vk } vk::glsl::program* build_pipeline(u64 storage_key, VkRenderPass render_pass); - - void load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); + vk::glsl::program* load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); virtual void create(const vk::render_device& dev); virtual void destroy(); @@ -123,7 +110,7 @@ namespace vk vk::framebuffer* get_framebuffer(vk::image* target, VkRenderPass render_pass); - virtual void emit_geometry(vk::command_buffer& cmd); + virtual void emit_geometry(vk::command_buffer& cmd, glsl::program* program); virtual void set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h); @@ -169,13 +156,14 @@ namespace vk vk::image_view* find_font(rsx::overlays::font* font, vk::command_buffer& cmd, vk::data_heap& upload_heap); vk::image_view* find_temp_image(rsx::overlays::image_info_base* desc, vk::command_buffer& cmd, vk::data_heap& upload_heap, u32 owner_uid); - std::vector get_push_constants() override; + std::vector get_vertex_inputs() override; + std::vector get_fragment_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; void set_primitive_type(rsx::overlays::primitive_type type); - void emit_geometry(vk::command_buffer& cmd) override; + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override; void run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, VkRenderPass render_pass, vk::data_heap& upload_heap, rsx::overlays::overlay& ui); @@ -189,7 +177,7 @@ namespace vk attachment_clear_pass(); - std::vector get_push_constants() override; + std::vector get_vertex_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; @@ -227,7 +215,7 @@ namespace vk video_out_calibration_pass(); - std::vector get_push_constants() override; + std::vector get_fragment_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override; diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index 52742e1241..13c16513d3 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -36,12 +36,12 @@ namespace vk { if (job.is_graphics_job) { - auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.pipe_layout, job.inputs, {}); + auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.inputs, {}); job.callback_func(compiled); } else { - auto compiled = int_compile_compute_pipe(job.compute_data, job.pipe_layout); + auto compiled = int_compile_compute_pipe(job.compute_data, job.inputs); job.callback_func(compiled); } } @@ -50,25 +50,26 @@ namespace vk } } - std::unique_ptr pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout) + std::unique_ptr pipe_compiler::int_compile_compute_pipe( + const VkComputePipelineCreateInfo& create_info, + const std::vector& cs_inputs) { - VkPipeline pipeline; - vkCreateComputePipelines(*g_render_device, nullptr, 1, &create_info, nullptr, &pipeline); - return std::make_unique(*m_device, pipeline, pipe_layout); + return std::make_unique(*m_device, create_info, cs_inputs); } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe( + const VkGraphicsPipelineCreateInfo& create_info, + const std::vector& vs_inputs, + const std::vector& fs_inputs) { - VkPipeline pipeline; - CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, VK_NULL_HANDLE, 1, &create_info, nullptr, &pipeline)); - auto result = std::make_unique(*m_device, pipeline, pipe_layout, vs_inputs, fs_inputs); - result->link(); - return result; + return std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe( + const vk::pipeline_props &create_info, + VkShaderModule modules[2], + const std::vector& vs_inputs, + const std::vector& fs_inputs) { VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -157,52 +158,54 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = pipe_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key); - return int_compile_graphics_pipe(info, pipe_layout, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(info, vs_inputs, fs_inputs); } std::unique_ptr pipe_compiler::compile( const VkComputePipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, - op_flags flags, callback_t callback) + op_flags flags, callback_t callback, + const std::vector& cs_inputs) { if (flags == COMPILE_INLINE) { - return int_compile_compute_pipe(create_info, pipe_layout); + return int_compile_compute_pipe(create_info, cs_inputs); } - m_work_queue.push(create_info, pipe_layout, callback); + m_work_queue.push(create_info, cs_inputs, callback); return {}; } std::unique_ptr pipe_compiler::compile( const VkGraphicsPipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, op_flags flags, callback_t /*callback*/, - const std::vector& vs_inputs, const std::vector& fs_inputs) + const std::vector& vs_inputs, + const std::vector& fs_inputs) { // It is very inefficient to defer this as all pointers need to be saved ensure(flags == COMPILE_INLINE); - return int_compile_graphics_pipe(create_info, pipe_layout, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs); } std::unique_ptr pipe_compiler::compile( - const vk::pipeline_props& create_info, - VkShaderModule module_handles[2], - VkPipelineLayout pipe_layout, + const vk::pipeline_props &create_info, + VkShaderModule vs, + VkShaderModule fs, op_flags flags, callback_t callback, - const std::vector& vs_inputs, const std::vector& fs_inputs) + const std::vector& vs_inputs, + const std::vector& fs_inputs) { + VkShaderModule modules[] = { vs, fs }; if (flags == COMPILE_INLINE) { - return int_compile_graphics_pipe(create_info, module_handles, pipe_layout, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs); } - m_work_queue.push(create_info, pipe_layout, module_handles, vs_inputs, fs_inputs, callback); + m_work_queue.push(create_info, modules, vs_inputs, fs_inputs, callback); return {}; } diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h index 836bc5f14f..a915595e62 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -68,21 +68,20 @@ namespace vk void initialize(const vk::render_device* pdev); std::unique_ptr compile( - const VkComputePipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, - op_flags flags, callback_t callback = {}); + const VkComputePipelineCreateInfo& cs, + op_flags flags, callback_t callback = {}, + const std::vector& cs_inputs = {}); std::unique_ptr compile( const VkGraphicsPipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); std::unique_ptr compile( const vk::pipeline_props &create_info, - VkShaderModule module_handles[2], - VkPipelineLayout pipe_layout, + VkShaderModule vs, + VkShaderModule fs, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); @@ -112,13 +111,11 @@ namespace vk vk::pipeline_props graphics_data; compute_pipeline_props compute_data; - VkPipelineLayout pipe_layout; VkShaderModule graphics_modules[2]; std::vector inputs; pipe_compiler_job( const vk::pipeline_props& props, - VkPipelineLayout layout, VkShaderModule modules[2], const std::vector& vs_in, const std::vector& fs_in, @@ -126,7 +123,6 @@ namespace vk { callback_func = func; graphics_data = props; - pipe_layout = layout; graphics_modules[0] = modules[0]; graphics_modules[1] = modules[1]; is_graphics_job = true; @@ -138,24 +134,34 @@ namespace vk pipe_compiler_job( const VkComputePipelineCreateInfo& props, - VkPipelineLayout layout, + const std::vector& cs_in, callback_t func) { callback_func = func; compute_data = props; - pipe_layout = layout; is_graphics_job = false; + + inputs = cs_in; } }; const vk::render_device* m_device = nullptr; lf_queue m_work_queue; - std::unique_ptr int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout); - std::unique_ptr int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs); - std::unique_ptr int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs); + std::unique_ptr int_compile_compute_pipe( + const VkComputePipelineCreateInfo& create_info, + const std::vector& cs_inputs); + + std::unique_ptr int_compile_graphics_pipe( + const VkGraphicsPipelineCreateInfo& create_info, + const std::vector& vs_inputs, + const std::vector& fs_inputs); + + std::unique_ptr int_compile_graphics_pipe( + const vk::pipeline_props &create_info, + VkShaderModule modules[2], + const std::vector& vs_inputs, + const std::vector& fs_inputs); }; void initialize_pipe_compiler(int num_worker_threads = -1); diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 4f9f535a76..647b21adc4 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -46,15 +46,14 @@ namespace vk const fragment_program_type& fragmentProgramData, const vk::pipeline_props& pipelineProperties, bool compile_async, - std::function callback, - VkPipelineLayout common_pipeline_layout) + std::function callback) { const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; - VkShaderModule modules[2] = { vertexProgramData.handle, fragmentProgramData.handle }; - auto compiler = vk::get_pipe_compiler(); auto result = compiler->compile( - pipelineProperties, modules, common_pipeline_layout, + pipelineProperties, + vertexProgramData.handle, + fragmentProgramData.handle, compiler_flags, callback, vertexProgramData.uniforms, fragmentProgramData.uniforms); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 57174caa98..34bb4d1331 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "VKProgramPipeline.h" +#include "VKResourceManager.h" #include "vkutils/descriptors.h" #include "vkutils/device.h" @@ -7,10 +8,61 @@ namespace vk { + extern vk::render_device* get_current_renderer(); + namespace glsl { using namespace ::glsl; + VkDescriptorType to_descriptor_type(program_input_type type) + { + switch (type) + { + case input_type_uniform_buffer: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case input_type_texel_buffer: + return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case input_type_texture: + return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + case input_type_storage_buffer: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case input_type_storage_texture: + return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + default: + fmt::throw_exception("Unexpected program input type %d", static_cast(type)); + } + } + + VkShaderStageFlags to_shader_stage_flags(::glsl::program_domain domain) + { + switch (domain) + { + case glsl_vertex_program: + return VK_SHADER_STAGE_VERTEX_BIT; + case glsl_fragment_program: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case glsl_compute_program: + return VK_SHADER_STAGE_COMPUTE_BIT; + default: + fmt::throw_exception("Unexpected domain %d", static_cast(domain)); + } + } + + const char* to_string(::glsl::program_domain domain) + { + switch (domain) + { + case glsl_vertex_program: + return "vertex"; + case glsl_fragment_program: + return "fragment"; + case glsl_compute_program: + return "compute"; + default: + fmt::throw_exception("Unexpected domain %d", static_cast(domain)); + } + } + void shader::create(::glsl::program_domain domain, const std::string& source) { type = domain; @@ -23,11 +75,8 @@ namespace vk if (!spirv::compile_glsl_to_spv(m_compiled, m_source, type, ::glsl::glsl_rules_vulkan)) { - const std::string shader_type = type == ::glsl::program_domain::glsl_vertex_program ? "vertex" : - type == ::glsl::program_domain::glsl_fragment_program ? "fragment" : "compute"; - rsx_log.notice("%s", m_source); - fmt::throw_exception("Failed to compile %s shader", shader_type); + fmt::throw_exception("Failed to compile %s shader", to_string(type)); } VkShaderModuleCreateInfo vs_info; @@ -69,34 +118,56 @@ namespace vk return m_handle; } - void program::create_impl() + void program::init() { linked = false; - attribute_location_mask = 0; - vertex_attributes_mask = 0; fs_texture_bindings.fill(~0u); fs_texture_mirror_bindings.fill(~0u); vs_texture_bindings.fill(~0u); } - program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs) - : m_device(dev), pipeline(p), pipeline_layout(layout) + program::program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs) + : m_device(dev) { - create_impl(); - load_uniforms(vertex_input); + init(); + + load_uniforms(vertex_inputs); load_uniforms(fragment_inputs); + + create_pipeline_layout(); + ensure(m_pipeline_layout); + + auto _create_info = create_info; + _create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &create_info, nullptr, &m_pipeline)); } - program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout) - : m_device(dev), pipeline(p), pipeline_layout(layout) + program::program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs) + : m_device(dev) { - create_impl(); + init(); + + load_uniforms(compute_inputs); + + create_pipeline_layout(); + ensure(m_pipeline_layout); + + auto _create_info = create_info; + _create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateComputePipelines(dev, nullptr, 1, &create_info, nullptr, &m_pipeline)); } program::~program() { - vkDestroyPipeline(m_device, pipeline, nullptr); + vkDestroyPipeline(m_device, m_pipeline, nullptr); + + if (m_pipeline_layout) + { + vkDestroyPipelineLayout(m_device, m_pipeline_layout, nullptr); + vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); + vk::get_resource_manager()->dispose(m_descriptor_pool); + } } program& program::load_uniforms(const std::vector& inputs) @@ -160,14 +231,36 @@ namespace vk }); } - void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type, vk::descriptor_set &set) + u32 program::get_uniform_location(program_input_type type, const std::string& uniform_name) + { + const auto& uniform = uniforms[type]; + const auto result = std::find_if(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) + { + return u.name == uniform_name; + }); + + if (result == uniform.end()) + { + return { umax }; + } + + return result->location; + } + + void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type) { for (const auto &uniform : uniforms[program_input_type::input_type_texture]) { if (uniform.name == uniform_name) { - set.push(image_descriptor, type, uniform.location); - attribute_location_mask |= (1ull << uniform.location); + if (m_descriptor_slots[uniform.location].matches(image_descriptor)) + { + return; + } + + next_descriptor_set(); + m_descriptor_set.push(image_descriptor, type, uniform.location); + m_descriptors_dirty[uniform.location] = false; return; } } @@ -175,7 +268,7 @@ namespace vk rsx_log.notice("texture not found in program: %s", uniform_name.c_str()); } - void program::bind_uniform(const VkDescriptorImageInfo & image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror) + void program::bind_uniform(const VkDescriptorImageInfo & image_descriptor, int texture_unit, ::glsl::program_domain domain, bool is_stencil_mirror) { ensure(domain != ::glsl::program_domain::glsl_compute_program); @@ -189,34 +282,46 @@ namespace vk binding = vs_texture_bindings[texture_unit]; } - if (binding != ~0u) + if (binding == ~0u) [[ unlikely ]] { - set.push(image_descriptor, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); - attribute_location_mask |= (1ull << binding); + rsx_log.notice("texture not found in program: %stex%u", (domain == ::glsl::program_domain::glsl_vertex_program) ? "v" : "", texture_unit); return; } - rsx_log.notice("texture not found in program: %stex%u", (domain == ::glsl::program_domain::glsl_vertex_program)? "v" : "", texture_unit); + if (m_descriptor_slots[binding].matches(image_descriptor)) + { + return; + } + + next_descriptor_set(); + m_descriptor_set.push(image_descriptor, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); + m_descriptors_dirty[binding] = false; } - void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set) + void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point) { - bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, set); + bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); } - void program::bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set) + void program::bind_uniform(const VkBufferView &buffer_view, u32 binding_point) { - set.push(buffer_view, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, binding_point); - attribute_location_mask |= (1ull << binding_point); + if (m_descriptor_slots[binding_point].matches(buffer_view)) + { + return; + } + + next_descriptor_set(); + m_descriptor_set.push(buffer_view, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, binding_point); + m_descriptors_dirty[binding_point] = false; } - void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set) + void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name) { for (const auto &uniform : uniforms[type]) { if (uniform.name == binding_name) { - bind_uniform(buffer_view, uniform.location, set); + bind_uniform(buffer_view, uniform.location); return; } } @@ -224,10 +329,135 @@ namespace vk rsx_log.notice("vertex buffer not found in program: %s", binding_name.c_str()); } - void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set) + void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type) { - set.push(buffer_descriptor, type, binding_point); - attribute_location_mask |= (1ull << binding_point); + m_descriptor_set.push(buffer_descriptor, type, binding_point); + m_descriptors_dirty[binding_point] = false; + } + + VkDescriptorSet program::allocate_descriptor_set() + { + if (!m_descriptor_pool) + { + create_descriptor_pool(); + } + + return m_descriptor_pool->allocate(m_descriptor_set_layout); + } + + void program::next_descriptor_set() + { + const auto new_set = allocate_descriptor_set(); + const auto old_set = m_descriptor_set.value(); + + if (old_set) + { + m_copy_cmds.clear(); + for (unsigned i = 0; i < m_copy_cmds.size(); ++i) + { + if (!m_descriptors_dirty[i]) + { + continue; + } + + // Reuse already initialized memory. Each command is the same anyway. + m_copy_cmds.resize(m_copy_cmds.size() + 1); + auto& cmd = m_copy_cmds.back(); + cmd.srcBinding = cmd.dstBinding = i; + cmd.srcSet = old_set; + cmd.dstSet = new_set; + } + + m_descriptor_set.push(m_copy_cmds); + } + + m_descriptor_set = allocate_descriptor_set(); + } + + program& program::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point) + { + VkDescriptorSet set = m_descriptor_set.value(); + vkCmdBindPipeline(cmd, bind_point, m_pipeline); + vkCmdBindDescriptorSets(cmd, bind_point, m_pipeline_layout, 0, 1, &set, 0, nullptr); + return *this; + } + + void program::create_descriptor_set_layout() + { + ensure(m_descriptor_set_layout == VK_NULL_HANDLE); + + rsx::simple_array bindings; + bindings.reserve(16); + + m_descriptor_pool_sizes.clear(); + m_descriptor_pool_sizes.reserve(input_type_max_enum); + + for (const auto& type_arr : uniforms) + { + if (type_arr.empty() || type_arr.front().type == input_type_push_constant) + { + continue; + } + + VkDescriptorType type = to_descriptor_type(type_arr.front().type); + m_descriptor_pool_sizes.push_back({ .type = type }); + + for (const auto& input : type_arr) + { + VkDescriptorSetLayoutBinding binding + { + .binding = input.location, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = to_shader_stage_flags(input.domain) + }; + bindings.push_back(binding); + m_descriptor_pool_sizes.back().descriptorCount++; + } + } + + VkDescriptorSetLayoutCreateInfo set_layout_create_info + { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .flags = 0, + .bindingCount = ::size32(bindings), + .pBindings = bindings.data() + }; + CHECK_RESULT(vkCreateDescriptorSetLayout(m_device, &set_layout_create_info, nullptr, &m_descriptor_set_layout)); + } + + void program::create_pipeline_layout() + { + ensure(!linked); + ensure(m_pipeline_layout == VK_NULL_HANDLE); + + create_descriptor_set_layout(); + + rsx::simple_array push_constants{}; + for (const auto& input : uniforms[input_type_push_constant]) + { + const auto& range = input.as_push_constant(); + push_constants.push_back({ .offset = range.offset, .size = range.size }); + } + + VkPipelineLayoutCreateInfo create_info + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = &m_descriptor_set_layout, + .pushConstantRangeCount = ::size32(push_constants), + .pPushConstantRanges = push_constants.data() + }; + CHECK_RESULT(vkCreatePipelineLayout(m_device, &create_info, nullptr, &m_pipeline_layout)); + } + + void program::create_descriptor_pool() + { + ensure(linked); + + m_descriptor_pool = std::make_unique(); + m_descriptor_pool->create(*vk::get_current_renderer(), m_descriptor_pool_sizes); } } } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 06dbaf877f..0b3e8ed7d1 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -7,6 +7,7 @@ #include #include +#include namespace vk { @@ -15,18 +16,20 @@ namespace vk enum program_input_type : u32 { input_type_uniform_buffer = 0, - input_type_texel_buffer = 1, - input_type_texture = 2, - input_type_storage_buffer = 3, + input_type_texel_buffer, + input_type_texture, + input_type_storage_buffer, + input_type_storage_texture, + input_type_push_constant, - input_type_max_enum = 4 + input_type_max_enum }; struct bound_sampler { - VkFormat format; - VkImage image; - VkComponentMapping mapping; + VkFormat format = VK_FORMAT_UNDEFINED; + VkImage image = VK_NULL_HANDLE; + VkComponentMapping mapping{}; }; struct bound_buffer @@ -37,16 +40,73 @@ namespace vk u64 size = 0; }; + struct push_constant_ref + { + u32 offset = 0; + u32 size = 0; + }; + struct program_input { ::glsl::program_domain domain; program_input_type type; - bound_buffer as_buffer; - bound_sampler as_sampler; + using bound_data_t = std::variant; + bound_data_t bound_data; u32 location; std::string name; + + inline bound_buffer& as_buffer() { return *std::get_if(&bound_data); } + inline bound_sampler& as_sampler() { return *std::get_if(&bound_data); } + inline push_constant_ref& as_push_constant() { return *std::get_if(&bound_data); } + + inline const bound_buffer& as_buffer() const { return *std::get_if(&bound_data); } + inline const bound_sampler& as_sampler() const { return *std::get_if(&bound_data); } + inline const push_constant_ref& as_push_constant() const { return *std::get_if(&bound_data); } + + static program_input make( + ::glsl::program_domain domain, + const std::string& name, + program_input_type type, + u32 location, + const bound_data_t& data = bound_buffer{}) + { + return program_input + { + .domain = domain, + .type = type, + .bound_data = data, + .location = location, + .name = name + }; + } + }; + + union descriptor_slot_t + { + VkDescriptorImageInfo image_info; + VkDescriptorBufferInfo buffer_info; + VkBufferView buffer_view; + + bool matches(const VkDescriptorImageInfo& test) const + { + return test.imageView == image_info.imageView && + test.sampler == image_info.sampler && + test.imageLayout == image_info.imageLayout; + } + + bool matches(const VkDescriptorBufferInfo& test) const + { + return test.buffer == buffer_info.buffer && + test.offset == buffer_info.offset && + test.range == buffer_info.range; + } + + bool matches(VkBufferView test) const + { + return test == buffer_view; + } }; class shader @@ -75,37 +135,61 @@ namespace vk class program { std::array, input_type_max_enum> uniforms; - VkDevice m_device; + VkDevice m_device = VK_NULL_HANDLE; + + VkPipeline m_pipeline = VK_NULL_HANDLE; + VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; std::array fs_texture_bindings; std::array fs_texture_mirror_bindings; std::array vs_texture_bindings; - bool linked; + bool linked = false; - void create_impl(); + std::unique_ptr m_descriptor_pool; + VkDescriptorSetLayout m_descriptor_set_layout = VK_NULL_HANDLE; + vk::descriptor_set m_descriptor_set{}; + rsx::simple_array m_descriptor_pool_sizes; + + std::vector m_descriptor_slots; + std::vector m_descriptors_dirty; + rsx::simple_array m_copy_cmds; + + void init(); + + void create_descriptor_set_layout(); + void create_pipeline_layout(); + void create_descriptor_pool(); + + VkDescriptorSet allocate_descriptor_set(); + void next_descriptor_set(); + + program& load_uniforms(const std::vector& inputs); public: - VkPipeline pipeline; - VkPipelineLayout pipeline_layout; - u64 attribute_location_mask; - u64 vertex_attributes_mask; - program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs); - program(VkDevice dev, VkPipeline p, VkPipelineLayout layout); + program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs); + program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs); program(const program&) = delete; program(program&& other) = delete; ~program(); - program& load_uniforms(const std::vector& inputs); program& link(); + program& bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point); bool has_uniform(program_input_type type, const std::string &uniform_name); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type, vk::descriptor_set &set); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror = false); - void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set); - void bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set); - void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set); - void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set); + u32 get_uniform_location(program_input_type type, const std::string& uniform_name); + + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type); + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, bool is_stencil_mirror = false); + void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name); + void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type); + + void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 binding_point); + + inline VkPipelineLayout layout() const { return m_pipeline_layout; } + inline VkPipeline value() const { return m_pipeline; } }; } } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index caa85dcc84..3c3ef0acbd 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -154,7 +154,7 @@ namespace vk // If we have driver support for FBO loops, set the usage flag for it. if (vk::get_current_renderer()->get_framebuffer_loops_support()) { - return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, 0 }; + return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT }; } // Workarounds to force transition to GENERAL to decompress. diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 7cf6631b67..2403f5bc59 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -23,43 +23,36 @@ namespace vk void build(const std::string& format_prefix, bool unresolve, bool bgra_swap); - std::vector> get_descriptor_layout() override - { - return - { - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2 } - }; - } - - void declare_inputs() override + std::vector get_inputs() override { std::vector inputs = { - { + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 0, - "multisampled" - }, - { + "multisampled", + glsl::input_type_storage_texture, + 0 + ), + + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 1, - "resolve" - } + "resolve", + glsl::input_type_storage_texture, + 1 + ), }; - m_program->load_uniforms(inputs); + auto result = compute_task::get_inputs(); + result.insert(result.end(), inputs.begin(), inputs.end()); + return result; } void bind_resources() override { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); - m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); - m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); + m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); } void run(const vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image) @@ -116,19 +109,22 @@ namespace vk void build(bool resolve_depth, bool resolve_stencil, bool unresolve); - std::vector get_push_constants() override + std::vector get_fragment_inputs() override { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - constant.offset = 0; - constant.size = 16; - - return { constant }; + auto result = overlay_pass::get_fragment_inputs(); + result.push_back(glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + glsl::input_type_push_constant, + umax, + glsl::push_constant_ref{ .size = 16 } + )); + return result; } - void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override + void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); } void update_sample_configuration(vk::image* msaa_image) @@ -226,16 +222,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd) override + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, ®ion); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } @@ -285,16 +281,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd) override + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, &clear_region); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 3c9188fd60..da10965be9 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -254,7 +254,7 @@ namespace vk m_shader_cache[compiler_options].m_fs = std::move(fs); return ret; } - +/* std::pair shader_interpreter::create_layout(VkDevice dev) { const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); @@ -356,24 +356,16 @@ namespace vk CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); return { set_layout, result }; } - - void shader_interpreter::create_descriptor_pools(const vk::render_device& dev) - { - const auto max_draw_calls = dev.get_descriptor_max_draw_calls(); - m_descriptor_pool.create(dev, m_descriptor_pool_sizes, max_draw_calls); - } +*/ void shader_interpreter::init(const vk::render_device& dev) { m_device = dev; - std::tie(m_shared_descriptor_layout, m_shared_pipeline_layout) = create_layout(dev); - create_descriptor_pools(dev); } void shader_interpreter::destroy() { m_program_cache.clear(); - m_descriptor_pool.destroy(); for (auto &fs : m_shader_cache) { @@ -382,18 +374,6 @@ namespace vk } m_shader_cache.clear(); - - if (m_shared_pipeline_layout) - { - vkDestroyPipelineLayout(m_device, m_shared_pipeline_layout, nullptr); - m_shared_pipeline_layout = VK_NULL_HANDLE; - } - - if (m_shared_descriptor_layout) - { - vkDestroyDescriptorSetLayout(m_device, m_shared_descriptor_layout, nullptr); - m_shared_descriptor_layout = VK_NULL_HANDLE; - } } glsl::program* shader_interpreter::link(const vk::pipeline_props& properties, u64 compiler_opt) @@ -478,28 +458,30 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = m_shared_pipeline_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, m_shared_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); + auto program = compiler->compile(info, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); return program.release(); } - void shader_interpreter::update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set) + void shader_interpreter::update_fragment_textures(const std::array& sampled_images) { - const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); - for (u32 i = 0, binding = m_fragment_textures_start; i < 4; ++i, ++binding, texture_ptr += 16) + // FIXME: Cannot use m_fragment_textures.start now since each interpreter has its own binding layout + u32 binding = m_current_interpreter->get_uniform_location(glsl::input_type_texture, "texture1D_array"); + if (binding == umax) { - set.push(texture_ptr, 16, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); + return; } - } - VkDescriptorSet shader_interpreter::allocate_descriptor_set() - { - return m_descriptor_pool.allocate(m_shared_descriptor_layout); + const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); + for (u32 i = 0; i < 4; ++i, ++binding, texture_ptr += 16) + { + m_current_interpreter->bind_uniform_array(texture_ptr, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 16, binding); + } } glsl::program* shader_interpreter::get( diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h index d359ca343e..aeaad698fb 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -16,8 +16,6 @@ namespace vk std::vector m_fs_inputs; VkDevice m_device = VK_NULL_HANDLE; - VkDescriptorSetLayout m_shared_descriptor_layout = VK_NULL_HANDLE; - VkPipelineLayout m_shared_pipeline_layout = VK_NULL_HANDLE; glsl::program* m_current_interpreter = nullptr; struct pipeline_key @@ -47,8 +45,6 @@ namespace vk std::unordered_map, key_hasher> m_program_cache; std::unordered_map m_shader_cache; - rsx::simple_array m_descriptor_pool_sizes; - vk::descriptor_pool m_descriptor_pool; u32 m_vertex_instruction_start = 0; u32 m_fragment_instruction_start = 0; @@ -56,9 +52,6 @@ namespace vk pipeline_key m_current_key{}; - std::pair create_layout(VkDevice dev); - void create_descriptor_pools(const vk::render_device& dev); - glsl::shader* build_vs(u64 compiler_opt); glsl::shader* build_fs(u64 compiler_opt); glsl::program* link(const vk::pipeline_props& properties, u64 compiler_opt); @@ -78,7 +71,6 @@ namespace vk u32 get_vertex_instruction_location() const; u32 get_fragment_instruction_location() const; - void update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set); - VkDescriptorSet allocate_descriptor_set(); + void update_fragment_textures(const std::array& sampled_images); }; } diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index c256070490..d0b972765c 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -68,36 +68,28 @@ namespace vk create(); } - std::vector> fsr_pass::get_descriptor_layout() - { - return - { - { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1 }, - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 } - }; - } - - void fsr_pass::declare_inputs() + std::vector fsr_pass::get_inputs() { std::vector inputs = { - { + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 0, - "InputTexture" - }, - { + "InputTexture", + vk::glsl::input_type_texture, + 0 + ), + + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 1, - "OutputTexture" - } + "OutputTexture", + vk::glsl::input_type_storage_texture, + 1 + ), }; - m_program->load_uniforms(inputs); + auto result = compute_task::get_inputs(); + result.insert(result.end(), inputs.begin(), inputs.end()); + return result; } void fsr_pass::bind_resources() @@ -111,8 +103,8 @@ namespace vk VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); - m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); } void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size) @@ -158,7 +150,7 @@ namespace vk static_cast(src_image->width()), static_cast(src_image->height()), // Size of the raw image to upscale (in case viewport does not cover it all) static_cast(m_output_size.width), static_cast(m_output_size.height)); // Size of output viewport (target size) - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } rcas_pass::rcas_pass() @@ -177,7 +169,7 @@ namespace vk auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f); FsrRcasCon(&m_constants_buf[0], cas_attenuation); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } } // Namespace FidelityFX diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h index c5b5b30e73..6d9b15d72a 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h @@ -19,8 +19,7 @@ namespace vk size2u m_output_size; u32 m_constants_buf[20]; - std::vector> get_descriptor_layout() override; - void declare_inputs() override; + std::vector get_inputs() override; void bind_resources() override; virtual void configure(const vk::command_buffer& cmd) = 0; From 4d493bbb800f38ad57aaac37f289e376e5ccbd2d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 8 Jun 2025 21:34:57 +0300 Subject: [PATCH 02/30] vk: Fix build --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 35 +++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 34bb4d1331..2090b6dd26 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -8,7 +8,7 @@ namespace vk { - extern vk::render_device* get_current_renderer(); + extern const vk::render_device* get_current_renderer(); namespace glsl { @@ -331,10 +331,43 @@ namespace vk void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type) { + if (m_descriptor_slots[binding_point].matches(buffer_descriptor)) + { + return; + } + + next_descriptor_set(); m_descriptor_set.push(buffer_descriptor, type, binding_point); m_descriptors_dirty[binding_point] = false; } + void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 binding_point) + { + // FIXME: Unoptimized... + bool match = true; + for (int i = 0; i < count; ++i) + { + if (!m_descriptor_slots[binding_point + i].matches(image_descriptors[i])) + { + match = false; + break; + } + } + + if (match) + { + return; + } + + next_descriptor_set(); + m_descriptor_set.push(image_descriptors, static_cast(count), type, binding_point); + + for (int i = 0; i < count; ++i) + { + m_descriptors_dirty[binding_point] = false; + } + } + VkDescriptorSet program::allocate_descriptor_set() { if (!m_descriptor_pool) From 356b2f591060f29d0453f4c92a6619fc0e983e83 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 12 Jun 2025 03:56:30 +0300 Subject: [PATCH 03/30] vk: Rewrite program binding management to use "separate shader objects" concept. --- rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp | 30 ++ rpcs3/Emu/RSX/VK/VKCommonDecompiler.h | 2 + rpcs3/Emu/RSX/VK/VKCompute.cpp | 10 +- rpcs3/Emu/RSX/VK/VKCompute.h | 12 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 39 +- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 152 ++++-- rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 17 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 29 +- rpcs3/Emu/RSX/VK/VKOverlays.cpp | 15 +- rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp | 14 +- rpcs3/Emu/RSX/VK/VKPipelineCompiler.h | 7 +- rpcs3/Emu/RSX/VK/VKProgramHelper.hpp | 12 + rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 531 +++++++++++-------- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 105 ++-- rpcs3/Emu/RSX/VK/VKResolveHelper.h | 7 +- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 4 +- rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 139 +++-- rpcs3/Emu/RSX/VK/VKVertexProgram.h | 16 +- rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp | 6 +- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 5 - rpcs3/Emu/RSX/VK/vkutils/descriptors.h | 4 +- 21 files changed, 743 insertions(+), 413 deletions(-) create mode 100644 rpcs3/Emu/RSX/VK/VKProgramHelper.hpp diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 01e5cc07aa..1191561625 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -35,4 +35,34 @@ namespace vk fmt::throw_exception("Unknown register name: %s", varying_register_name); } + + int get_texture_index(std::string_view name) + { + if (name.length() < 2) + { + fmt::throw_exception("Invalid texture name: '%s'", name); + } + +#define IS_DIGIT(x) (x >= '0' && x <= '9') + + constexpr int max_index_length = 2; + std::string index; + + for (int char_idx = name.length() - max_index_length; char_idx < name.length(); ++char_idx) + { + if (IS_DIGIT(name[char_idx])) + { + index += name[char_idx]; + } + } + +#undef IS_DIGIT + + if (index.empty()) + { + fmt::throw_exception("Invalid texture name: '%s'", name); + } + + return std::atoi(index.c_str()); + } } diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h index b0920e27f5..b17eb83b11 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -6,4 +6,6 @@ namespace vk using namespace ::glsl; int get_varying_register_location(std::string_view varying_register_name); + + int get_texture_index(std::string_view name); } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index ae36723b81..c164edddd7 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -18,6 +18,7 @@ namespace vk ::glsl::glsl_compute_program, "ssbo" + std::to_string(i), glsl::program_input_type::input_type_storage_buffer, + 0, i ); result.push_back(input); @@ -31,6 +32,7 @@ namespace vk "push_constants", glsl::program_input_type::input_type_push_constant, 0, + 0, glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } ); result.push_back(input); @@ -243,7 +245,7 @@ namespace vk void cs_shuffle_base::bind_resources() { - m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ m_data->value, m_data_offset, m_data_length }, 0, 0); } void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) @@ -289,7 +291,7 @@ namespace vk void cs_interleave_task::bind_resources() { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } void cs_interleave_task::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) @@ -349,8 +351,8 @@ namespace vk void cs_aggregator::bind_resources() { - m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ src->value, 0, block_length }, 0, 0); + m_program->bind_uniform({ dst->value, 0, 4 }, 0, 1); } void cs_aggregator::run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index d4e99d8cf3..fa053afe50 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -344,7 +344,7 @@ namespace vk void bind_resources() override { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset) @@ -445,8 +445,8 @@ namespace vk void bind_resources() override { - m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ src_buffer->value, in_offset, block_length }, 0, 0); + m_program->bind_uniform({ dst_buffer->value, out_offset, block_length }, 0, 1); } void set_parameters(const vk::command_buffer& cmd) @@ -573,9 +573,9 @@ namespace vk void bind_resources() override { - const auto op = static_cast(Op); - m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + const auto op = static_cast(Op); + m_program->bind_uniform({ src_buffer->value, in_offset, in_block_length }, 0u, 0u ^ op); + m_program->bind_uniform({ dst_buffer->value, out_offset, out_block_length }, 0u, 1u ^ op); } void set_parameters(const vk::command_buffer& cmd) diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 4d1253340d..1e96087694 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -554,8 +554,8 @@ bool VKGSRender::bind_texture_env() if (view) [[likely]] { m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program); + vk::glsl::binding_set_index_fragment, + m_fragment_prog->binding_table.ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { @@ -575,24 +575,22 @@ bool VKGSRender::bind_texture_env() } m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - true); + vk::glsl::binding_set_index_fragment, + m_fragment_prog->binding_table.ftex_stencil_location[i]); } } else { const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program); + vk::glsl::binding_set_index_fragment, + m_fragment_prog->binding_table.ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - true); + vk::glsl::binding_set_index_fragment, + m_fragment_prog->binding_table.ftex_stencil_location[i]); } } } @@ -606,8 +604,8 @@ bool VKGSRender::bind_texture_env() { const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program); + vk::glsl::binding_set_index_vertex, + m_vertex_prog->binding_table.vtex_location[i]); continue; } @@ -629,8 +627,8 @@ bool VKGSRender::bind_texture_env() const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program); + vk::glsl::binding_set_index_vertex, + m_vertex_prog->binding_table.vtex_location[i]); continue; } @@ -638,8 +636,8 @@ bool VKGSRender::bind_texture_env() validate_image_layout_for_read_access(*m_current_command_buffer, image_ptr, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, sampler_state); m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, - i, - ::glsl::program_domain::glsl_vertex_program); + vk::glsl::binding_set_index_vertex, + m_vertex_prog->binding_table.vtex_location[i]); } return out_of_memory; @@ -820,8 +818,6 @@ void VKGSRender::emit_geometry(u32 sub_index) auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; bool update_descriptors = false; - const auto& binding_table = m_device->get_pipeline_binding_table(); - if (m_current_draw.subdraw_id == 0) { update_descriptors = true; @@ -878,9 +874,10 @@ void VKGSRender::emit_geometry(u32 sub_index) ensure(m_vertex_layout_storage); if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot); - m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1); - m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2); + const auto& binding_table = m_vertex_prog->binding_table; + m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location); + m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location + 1); + m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location + 2); } bool reload_state = (!m_current_draw.subdraw_id++); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index dd654a6736..25f4297dee 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -26,8 +26,85 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st return glsl::compareFunctionImpl(f, Op0, Op1); } +void VKFragmentDecompilerThread::prepareBindingTable() +{ + // First check if we have constants and textures as those need extra work + bool has_constants = false, has_textures = false; + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (has_constants && has_textures) + { + break; + } + + if (PT.type.starts_with("sampler")) + { + has_textures = true; + continue; + } + + ensure(PT.type.starts_with("vec")); + has_constants = true; + } + + unsigned location = 0; // All bindings must be set from this var + vk_prog->binding_table.context_buffer_location = location++; + if (has_constants) + { + vk_prog->binding_table.cbuf_location = location++; + } + + vk_prog->binding_table.tex_param_location = location++; + vk_prog->binding_table.polygon_stipple_params_location = location++; + + if (has_textures) [[ likely ]] + { + unsigned num_textures = 0; + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (!PT.type.starts_with("sampler")) + { + continue; + } + + for (const ParamItem& PI : PT.items) + { + num_textures++; + + const auto texture_id = vk::get_texture_index(PI.name); + const auto mask = 1u << texture_id; + + // Allocate real binding + vk_prog->binding_table.ftex_location[texture_id] = location++; + + // Tag the stencil mirror if required + if (properties.redirected_sampler_mask & mask) [[ unlikely ]] + { + vk_prog->binding_table.ftex_stencil_location[texture_id] = 0; + } + } + + // Normalize stencil offsets + if (properties.redirected_sampler_mask != 0) [[ unlikely ]] + { + for (auto& stencil_location : vk_prog->binding_table.ftex_stencil_location) + { + if (stencil_location == umax) + { + continue; + } + + stencil_location = location++; + } + } + } + } +} + void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { + prepareBindingTable(); + std::vector required_extensions; if (device_props.has_native_half_support) @@ -97,21 +174,18 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { - u32 location = m_binding_table.textures_first_bind_slot; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type != "sampler1D" && - PT.type != "sampler2D" && - PT.type != "sampler3D" && - PT.type != "samplerCube") + if (PT.type.starts_with("sampler1D")) + { continue; + } for (const ParamItem& PI : PT.items) { std::string samplerType = PT.type; - ensure(PI.name.length() > 3); - int index = atoi(&PI.name[3]); + const int index = vk::get_texture_index(PI.name); const auto mask = (1 << index); if (properties.multisampled_sampler_mask & mask) @@ -135,39 +209,37 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - vk::glsl::program_input in; - in.location = location; - in.domain = glsl::glsl_fragment_program; - in.name = PI.name; - in.type = vk::glsl::input_type_texture; - + const int id = vk::get_texture_index(PI.name); + auto in = vk::glsl::program_input::make( + glsl::glsl_fragment_program, + PI.name, + vk::glsl::input_type_texture, + vk::glsl::binding_set_index_fragment, + vk_prog->binding_table.ftex_location[id] + ); inputs.push_back(in); - OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; if (properties.redirected_sampler_mask & mask) { // Insert stencil mirror declaration in.name += "_stencil"; - in.location = location; - + in.location = vk_prog->binding_table.ftex_stencil_location[id]; inputs.push_back(in); - OS << "layout(set=0, binding=" << location++ << ") uniform u" << samplerType << " " << in.name << ";\n"; + OS << "layout(set=0, binding=" << in.location << ") uniform u" << samplerType << " " << in.name << ";\n"; } } } - ensure(location <= m_binding_table.vertex_textures_first_bind_slot); // "Too many sampler descriptors!" - std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") + if (PT.type.starts_with("sampler1D")) + { continue; + } for (const ParamItem& PI : PT.items) { @@ -177,13 +249,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (!constants_block.empty()) { - OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n"; + OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; OS << "{\n"; OS << constants_block; OS << "};\n\n"; } - OS << "layout(std140, set = 0, binding = 3) uniform FragmentStateBuffer\n"; + OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; @@ -195,32 +267,39 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " float wpos_bias;\n"; OS << "};\n\n"; - OS << "layout(std140, set = 0, binding = 4) uniform TextureParametersBuffer\n"; + OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; OS << "{\n"; OS << " sampler_info texture_parameters[16];\n"; OS << "};\n\n"; - OS << "layout(std140, set = 0, binding = " << std::to_string(m_binding_table.rasterizer_env_bind_slot) << ") uniform RasterizerHeap\n"; + OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.polygon_stipple_params_location << ") uniform RasterizerHeap\n"; OS << "{\n"; OS << " uvec4 stipple_pattern[8];\n"; OS << "};\n\n"; - vk::glsl::program_input in; - in.location = m_binding_table.fragment_constant_buffers_bind_slot; - in.domain = glsl::glsl_fragment_program; - in.name = "FragmentConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - inputs.push_back(in); + vk::glsl::program_input in + { + .domain = glsl::glsl_fragment_program, + .type = vk::glsl::input_type_uniform_buffer, + .set = vk::glsl::binding_set_index_fragment + }; - in.location = m_binding_table.fragment_state_bind_slot; + if (!constants_block.empty()) + { + in.location = vk_prog->binding_table.cbuf_location; + in.name = "FragmentConstantsBuffer"; + inputs.push_back(in); + } + + in.location = vk_prog->binding_table.context_buffer_location; in.name = "FragmentStateBuffer"; inputs.push_back(in); - in.location = m_binding_table.fragment_texture_params_bind_slot; + in.location = vk_prog->binding_table.tex_param_location; in.name = "TextureParametersBuffer"; inputs.push_back(in); - in.location = m_binding_table.rasterizer_env_bind_slot; + in.location = vk_prog->binding_table.polygon_stipple_params_location; in.name = "RasterizerHeap"; inputs.push_back(in); } @@ -372,7 +451,6 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void VKFragmentDecompilerThread::Task() { - m_binding_table = vk::g_render_device->get_pipeline_binding_table(); m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index 787f38ec05..049455a866 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -19,7 +19,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler std::vector inputs; class VKFragmentProgram *vk_prog; glsl::shader_properties m_shader_props{}; - vk::pipeline_binding_table m_binding_table{}; + + void prepareBindingTable(); public: VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) @@ -32,6 +33,7 @@ public: void Task(); const std::vector& get_inputs() { return inputs; } + protected: std::string getFloatTypeName(usz elementCount) override; std::string getHalfTypeName(usz elementCount) override; @@ -63,8 +65,19 @@ public: std::vector FragmentConstantOffsetCache; std::array output_color_masks{ {} }; - std::vector uniforms; + + struct + { + u32 context_buffer_location = umax; // Rasterizer context + u32 cbuf_location = umax; // Constants register file + u32 tex_param_location = umax; // Texture configuration data + u32 polygon_stipple_params_location = umax; // Polygon stipple settings + u32 ftex_location[16]; // Texture locations array + u32 ftex_stencil_location[16]; // Texture stencil mirror array + + } binding_table; + void SetInputs(std::vector& inputs); /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 17f42f45e8..d64551f7e9 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2074,34 +2074,35 @@ void VKGSRender::load_program_env() } } - const auto& binding_table = m_device->get_pipeline_binding_table(); + const auto& vs_binding_table = m_vertex_prog->binding_table; + const auto& fs_binding_table = m_fragment_prog->binding_table; - m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot); - m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot); - m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot); + m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.context_buffer_location); + m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.cbuf_location); + m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.context_buffer_location); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.tex_param_location); + m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.polygon_stipple_params_location); - if (!m_shader_interpreter.is_interpreter(m_program)) + if (m_shader_interpreter.is_interpreter(m_program)) { - m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot); + m_program->bind_uniform(m_vertex_instructions_buffer_info, vk::glsl::binding_set_index_vertex, m_shader_interpreter.get_vertex_instruction_location()); + m_program->bind_uniform(m_fragment_instructions_buffer_info, vk::glsl::binding_set_index_fragment, m_shader_interpreter.get_fragment_instruction_location()); } - else + else if (fs_binding_table.cbuf_location != umax) { - m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform(m_fragment_constants_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.cbuf_location); } if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; - m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, vs_binding_table.cr_pred_buffer_location); } if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) { - m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); - m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); + m_program->bind_uniform(m_instancing_indirection_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.instanced_lut_buffer_location); + m_program->bind_uniform(m_instancing_constants_array_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.instanced_cbuf_location); } // Clear flags diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index 5cd4761983..6a74f8e646 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -63,21 +63,21 @@ namespace vk for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding) { const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : ""); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_uniform_buffer, 0); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_uniform_buffer, 0, 0); fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding) { const std::string name = "fs" + std::to_string(n); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, binding); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding) { const std::string name = "sp" + std::to_string(n); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, binding); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); fs_inputs.push_back(input); } @@ -179,13 +179,14 @@ namespace vk if (m_num_uniform_buffers > 0) { - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0); + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, 0); } for (uint n = 0; n < src.size(); ++n) { VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout }; - program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); + const auto [set, location] = program->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "fs" + std::to_string(n)); + program->bind_uniform(info, set, location); } program->bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); @@ -488,6 +489,7 @@ namespace vk "push_constants", glsl::input_type_push_constant, 0, + 0, glsl::push_constant_ref { .size = 68 } ) ); @@ -503,6 +505,7 @@ namespace vk "push_constants", glsl::input_type_push_constant, 0, + 0, glsl::push_constant_ref {.offset = 68, .size = 12 } ) ); @@ -715,6 +718,7 @@ namespace vk "push_constants", vk::glsl::input_type_push_constant, 0, + 0, glsl::push_constant_ref{ .size = 32 }) }; } @@ -864,6 +868,7 @@ namespace vk "push_constants", vk::glsl::input_type_push_constant, 0, + 0, glsl::push_constant_ref{ .size = 16 } ) ); diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index 13c16513d3..884841ec4b 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -54,7 +54,9 @@ namespace vk const VkComputePipelineCreateInfo& create_info, const std::vector& cs_inputs) { - return std::make_unique(*m_device, create_info, cs_inputs); + auto program = std::make_unique(*m_device, create_info, cs_inputs); + program->link(false); + return program; } std::unique_ptr pipe_compiler::int_compile_graphics_pipe( @@ -62,7 +64,9 @@ namespace vk const std::vector& vs_inputs, const std::vector& fs_inputs) { - return std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); + auto program = std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); + program->link(true); + return program; } std::unique_ptr pipe_compiler::int_compile_graphics_pipe( @@ -171,7 +175,7 @@ namespace vk op_flags flags, callback_t callback, const std::vector& cs_inputs) { - if (flags == COMPILE_INLINE) + if (flags & COMPILE_INLINE) { return int_compile_compute_pipe(create_info, cs_inputs); } @@ -187,7 +191,7 @@ namespace vk const std::vector& fs_inputs) { // It is very inefficient to defer this as all pointers need to be saved - ensure(flags == COMPILE_INLINE); + ensure(flags & COMPILE_INLINE); return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs); } @@ -200,7 +204,7 @@ namespace vk const std::vector& fs_inputs) { VkShaderModule modules[] = { vs, fs }; - if (flags == COMPILE_INLINE) + if (flags & COMPILE_INLINE) { return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h index a915595e62..25c0b8e1c0 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -53,13 +53,16 @@ namespace vk class pipe_compiler { public: - enum op_flags + enum op_flag_bits { COMPILE_DEFAULT = 0, COMPILE_INLINE = 1, - COMPILE_DEFERRED = 2 + COMPILE_DEFERRED = 2, + SEPARATE_SHADER_OBJECTS = 4 }; + using op_flags = rsx::flags32_t; + using callback_t = std::function&)>; pipe_compiler(); diff --git a/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp b/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp new file mode 100644 index 0000000000..328df80f1d --- /dev/null +++ b/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "VKProgramPipeline.h" + +namespace vk +{ + namespace glsl + { + + } +} + diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 2090b6dd26..5ac4bd9c26 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -14,6 +14,30 @@ namespace vk { using namespace ::glsl; + bool operator == (const descriptor_slot_t& a, const VkDescriptorImageInfo& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && + ptr->imageView == b.imageView && + ptr->sampler == b.sampler && + ptr->imageLayout == b.imageLayout; + } + + bool operator == (const descriptor_slot_t& a, const VkDescriptorBufferInfo& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && + ptr->buffer == b.buffer && + ptr->offset == b.offset && + ptr->range == b.range; + } + + bool operator == (const descriptor_slot_t& a, const VkBufferView& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && *ptr == b; + } + VkDescriptorType to_descriptor_type(program_input_type type) { switch (type) @@ -120,42 +144,24 @@ namespace vk void program::init() { - linked = false; - - fs_texture_bindings.fill(~0u); - fs_texture_mirror_bindings.fill(~0u); - vs_texture_bindings.fill(~0u); + m_linked = false; } program::program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs) - : m_device(dev) + : m_device(dev), m_info(create_info) { init(); load_uniforms(vertex_inputs); load_uniforms(fragment_inputs); - - create_pipeline_layout(); - ensure(m_pipeline_layout); - - auto _create_info = create_info; - _create_info.layout = m_pipeline_layout; - CHECK_RESULT(vkCreateGraphicsPipelines(dev, nullptr, 1, &create_info, nullptr, &m_pipeline)); } program::program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs) - : m_device(dev) + : m_device(dev), m_info(create_info) { init(); load_uniforms(compute_inputs); - - create_pipeline_layout(); - ensure(m_pipeline_layout); - - auto _create_info = create_info; - _create_info.layout = m_pipeline_layout; - CHECK_RESULT(vkCreateComputePipelines(dev, nullptr, 1, &create_info, nullptr, &m_pipeline)); } program::~program() @@ -165,257 +171,352 @@ namespace vk if (m_pipeline_layout) { vkDestroyPipelineLayout(m_device, m_pipeline_layout, nullptr); - vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); - vk::get_resource_manager()->dispose(m_descriptor_pool); + + for (auto& set : m_sets) + { + set.destroy(); + } } } program& program::load_uniforms(const std::vector& inputs) { - ensure(!linked); // "Cannot change uniforms in already linked program!" + ensure(!m_linked); // "Cannot change uniforms in already linked program!" for (auto &item : inputs) { - uniforms[item.type].push_back(item); + ensure(item.set < binding_set_index_max_enum); // Ensure we have a valid set id + ensure(item.location < 128u || item.type == input_type_push_constant); // Arbitrary limit but useful to catch possibly uninitialized values + m_sets[item.set].m_inputs[item.type].push_back(item); } return *this; } - program& program::link() + program& program::link(bool separate_objects) { - // Preprocess texture bindings - // Link step is only useful for rasterizer programs, compute programs do not need this - for (const auto &uniform : uniforms[program_input_type::input_type_texture]) - { - if (const auto name_start = uniform.name.find("tex"); name_start != umax) - { - const auto name_end = uniform.name.find("_stencil"); - const auto index_start = name_start + 3; // Skip 'tex' part - const auto index_length = (name_end != umax) ? name_end - index_start : name_end; - const auto index_part = uniform.name.substr(index_start, index_length); - const auto index = std::stoi(index_part); + auto p_graphics_info = std::get_if(&m_info); + auto p_compute_info = !p_graphics_info ? std::get_if(&m_info) : nullptr; + const bool is_graphics_pipe = p_graphics_info != nullptr; - if (name_start == 0) + if (!is_graphics_pipe) [[ likely ]] + { + // We only support compute and graphics, so disable this for compute + separate_objects = false; + } + + if (!separate_objects) + { + // Collapse all sets into set 0 if validation passed + auto& sink = m_sets[0]; + for (auto& set : m_sets) + { + for (auto& type_arr : set.m_inputs) { - // Fragment texture (tex...) - if (name_end == umax) + if (type_arr.empty()) { - // Normal texture - fs_texture_bindings[index] = uniform.location; - } - else - { - // Stencil mirror - fs_texture_mirror_bindings[index] = uniform.location; + continue; } + + auto type = type_arr.front().type; + auto& dst = sink.m_inputs[type]; + dst.insert(dst.end(), type_arr.begin(), type_arr.end()); + + // Clear + type_arr.clear(); } - else + } + + sink.validate(); + sink.init(m_device); + } + else + { + for (auto& set : m_sets) + { + for (auto& type_arr : set.m_inputs) { - // Vertex texture (vtex...) - vs_texture_bindings[index] = uniform.location; + if (type_arr.empty()) + { + continue; + } + + // Real set + set.validate(); + set.init(m_device); + break; } } } - linked = true; + create_pipeline_layout(); + ensure(m_pipeline_layout); + + if (is_graphics_pipe) + { + VkGraphicsPipelineCreateInfo create_info = *p_graphics_info; + create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); + } + else + { + VkComputePipelineCreateInfo create_info = *p_compute_info; + create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateComputePipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); + } + + m_linked = true; return *this; } bool program::has_uniform(program_input_type type, const std::string& uniform_name) { - const auto& uniform = uniforms[type]; - return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) + for (auto& set : m_sets) { - return u.name == uniform_name; - }); - } - - u32 program::get_uniform_location(program_input_type type, const std::string& uniform_name) - { - const auto& uniform = uniforms[type]; - const auto result = std::find_if(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) - { - return u.name == uniform_name; - }); - - if (result == uniform.end()) - { - return { umax }; - } - - return result->location; - } - - void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type) - { - for (const auto &uniform : uniforms[program_input_type::input_type_texture]) - { - if (uniform.name == uniform_name) + const auto& uniform = set.m_inputs[type]; + return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) { - if (m_descriptor_slots[uniform.location].matches(image_descriptor)) - { - return; - } + return u.name == uniform_name; + }); + } + } - next_descriptor_set(); - m_descriptor_set.push(image_descriptor, type, uniform.location); - m_descriptors_dirty[uniform.location] = false; - return; + std::pair program::get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name) + { + for (unsigned i = 0; i < ::size32(m_sets); ++i) + { + const auto& type_arr = m_sets[i].m_inputs[type]; + const auto result = std::find_if(type_arr.cbegin(), type_arr.cend(), [&](const auto& u) + { + return u.domain == domain && u.name == uniform_name; + }); + + if (result != type_arr.end()) + { + return { i, result->location }; } } - rsx_log.notice("texture not found in program: %s", uniform_name.c_str()); + return { umax, umax }; } - void program::bind_uniform(const VkDescriptorImageInfo & image_descriptor, int texture_unit, ::glsl::program_domain domain, bool is_stencil_mirror) + void program::bind_uniform(const VkDescriptorImageInfo& image_descriptor, u32 set_id, u32 binding_point) { - ensure(domain != ::glsl::program_domain::glsl_compute_program); - - u32 binding; - if (domain == ::glsl::program_domain::glsl_fragment_program) - { - binding = (is_stencil_mirror) ? fs_texture_mirror_bindings[texture_unit] : fs_texture_bindings[texture_unit]; - } - else - { - binding = vs_texture_bindings[texture_unit]; - } - - if (binding == ~0u) [[ unlikely ]] - { - rsx_log.notice("texture not found in program: %stex%u", (domain == ::glsl::program_domain::glsl_vertex_program) ? "v" : "", texture_unit); - return; - } - - if (m_descriptor_slots[binding].matches(image_descriptor)) + if (m_sets[set_id].m_descriptor_slots[binding_point] == image_descriptor) { return; } - next_descriptor_set(); - m_descriptor_set.push(image_descriptor, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); - m_descriptors_dirty[binding] = false; + m_sets[set_id].notify_descriptor_slot_updated(binding_point, image_descriptor); } - void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point) + void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point) { - bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - } - - void program::bind_uniform(const VkBufferView &buffer_view, u32 binding_point) - { - if (m_descriptor_slots[binding_point].matches(buffer_view)) + if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_descriptor) { return; } - next_descriptor_set(); - m_descriptor_set.push(buffer_view, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, binding_point); - m_descriptors_dirty[binding_point] = false; + m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_descriptor); } - void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name) + void program::bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point) { - for (const auto &uniform : uniforms[type]) - { - if (uniform.name == binding_name) - { - bind_uniform(buffer_view, uniform.location); - return; - } - } - - rsx_log.notice("vertex buffer not found in program: %s", binding_name.c_str()); - } - - void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type) - { - if (m_descriptor_slots[binding_point].matches(buffer_descriptor)) + if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_view) { return; } - next_descriptor_set(); - m_descriptor_set.push(buffer_descriptor, type, binding_point); - m_descriptors_dirty[binding_point] = false; + m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_view); } - void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 binding_point) + void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 set_id, u32 binding_point) { - // FIXME: Unoptimized... - bool match = true; + auto& set = m_sets[set_id]; for (int i = 0; i < count; ++i) { - if (!m_descriptor_slots[binding_point + i].matches(image_descriptors[i])) + if (set.m_descriptor_slots[binding_point + i] != image_descriptors[i]) + { + set.notify_descriptor_slot_updated(binding_point + i, image_descriptors[i]); + } + } + } + + void program::create_pipeline_layout() + { + ensure(!m_linked); + ensure(m_pipeline_layout == VK_NULL_HANDLE); + + rsx::simple_array push_constants{}; + rsx::simple_array set_layouts{}; + + for (auto& set : m_sets) + { + if (!set.m_device) { - match = false; break; } + + set.next_descriptor_set(); // Initializes the set layout and allocates first set + set_layouts.push_back(set.m_descriptor_set_layout); + + for (const auto& input : set.m_inputs[input_type_push_constant]) + { + const auto& range = input.as_push_constant(); + push_constants.push_back({ + .stageFlags = to_shader_stage_flags(input.domain), + .offset = range.offset, + .size = range.size + }); + } } - if (match) + VkPipelineLayoutCreateInfo create_info + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .flags = 0, + .setLayoutCount = set_layouts.size(), + .pSetLayouts = set_layouts.data(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data() + }; + CHECK_RESULT(vkCreatePipelineLayout(m_device, &create_info, nullptr, &m_pipeline_layout)); + } + + program& program::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point) + { + VkDescriptorSet bind_sets[binding_set_index_max_enum]; + unsigned count = 0; + + for (auto& set : m_sets) + { + if (!set.m_device) + { + break; + } + + bind_sets[count++] = set.m_descriptor_set.value(); // Current set pointer for binding + set.next_descriptor_set(); // Flush queue and update pointers + } + + vkCmdBindPipeline(cmd, bind_point, m_pipeline); + vkCmdBindDescriptorSets(cmd, bind_point, m_pipeline_layout, 0, count, bind_sets, 0, nullptr); + return *this; + } + + void descriptor_table_t::destroy() + { + if (!m_device) { return; } - next_descriptor_set(); - m_descriptor_set.push(image_descriptors, static_cast(count), type, binding_point); - - for (int i = 0; i < count; ++i) - { - m_descriptors_dirty[binding_point] = false; - } + vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); + vk::get_resource_manager()->dispose(m_descriptor_pool); } - VkDescriptorSet program::allocate_descriptor_set() + void descriptor_table_t::init(VkDevice dev) + { + m_device = dev; + + size_t bind_slots_count = 0; + for (auto& type_arr : m_inputs) + { + if (type_arr.empty() || type_arr.front().type == input_type_push_constant) + { + continue; + } + + bind_slots_count += type_arr.size(); + } + + m_descriptor_slots.resize(bind_slots_count); + std::memset(m_descriptor_slots.data(), 0, sizeof(descriptor_slot_t) * bind_slots_count); + + m_descriptors_dirty.resize(bind_slots_count); + std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); + } + + VkDescriptorSet descriptor_table_t::allocate_descriptor_set() { if (!m_descriptor_pool) { create_descriptor_pool(); + create_descriptor_set_layout(); } return m_descriptor_pool->allocate(m_descriptor_set_layout); } - void program::next_descriptor_set() + void descriptor_table_t::next_descriptor_set() { - const auto new_set = allocate_descriptor_set(); - const auto old_set = m_descriptor_set.value(); - - if (old_set) + if (!m_descriptor_set) { - m_copy_cmds.clear(); - for (unsigned i = 0; i < m_copy_cmds.size(); ++i) - { - if (!m_descriptors_dirty[i]) - { - continue; - } - - // Reuse already initialized memory. Each command is the same anyway. - m_copy_cmds.resize(m_copy_cmds.size() + 1); - auto& cmd = m_copy_cmds.back(); - cmd.srcBinding = cmd.dstBinding = i; - cmd.srcSet = old_set; - cmd.dstSet = new_set; - } - - m_descriptor_set.push(m_copy_cmds); + m_descriptor_set = allocate_descriptor_set(); + std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); + return; } - m_descriptor_set = allocate_descriptor_set(); + // Check if we need to actually open a new set + if (!m_any_descriptors_dirty) + { + return; + } + + auto old_set = m_descriptor_set.value(); + auto new_set = allocate_descriptor_set(); + + auto push_descriptor_slot = [this](unsigned idx) + { + const auto& slot = m_descriptor_slots[idx]; + const VkDescriptorType type = m_descriptor_types[idx]; + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + fmt::throw_exception("Unexpected descriptor structure at index %u", idx); + }; + + m_copy_cmds.clear(); + for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) + { + if (m_descriptors_dirty[i]) + { + // Push + push_descriptor_slot(i); + m_descriptors_dirty[i] = false; + continue; + } + + m_copy_cmds.push_back({ + .sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, + .srcSet = old_set, + .srcBinding = i, + .dstSet = new_set, + .dstBinding = i, + .descriptorCount = 1 + }); + } + + m_descriptor_set.push(m_copy_cmds); // Write previous state + m_descriptor_set = new_set; } - program& program::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point) - { - VkDescriptorSet set = m_descriptor_set.value(); - vkCmdBindPipeline(cmd, bind_point, m_pipeline); - vkCmdBindDescriptorSets(cmd, bind_point, m_pipeline_layout, 0, 1, &set, 0, nullptr); - return *this; - } - - void program::create_descriptor_set_layout() + void descriptor_table_t::create_descriptor_set_layout() { ensure(m_descriptor_set_layout == VK_NULL_HANDLE); @@ -425,7 +526,7 @@ namespace vk m_descriptor_pool_sizes.clear(); m_descriptor_pool_sizes.reserve(input_type_max_enum); - for (const auto& type_arr : uniforms) + for (const auto& type_arr : m_inputs) { if (type_arr.empty() || type_arr.front().type == input_type_push_constant) { @@ -445,6 +546,13 @@ namespace vk .stageFlags = to_shader_stage_flags(input.domain) }; bindings.push_back(binding); + + if (m_descriptor_types.size() < (input.location + 1)) + { + m_descriptor_types.resize((input.location + 1)); + } + + m_descriptor_types[input.location] = type; m_descriptor_pool_sizes.back().descriptorCount++; } } @@ -459,38 +567,31 @@ namespace vk CHECK_RESULT(vkCreateDescriptorSetLayout(m_device, &set_layout_create_info, nullptr, &m_descriptor_set_layout)); } - void program::create_pipeline_layout() + void descriptor_table_t::create_descriptor_pool() { - ensure(!linked); - ensure(m_pipeline_layout == VK_NULL_HANDLE); - - create_descriptor_set_layout(); - - rsx::simple_array push_constants{}; - for (const auto& input : uniforms[input_type_push_constant]) - { - const auto& range = input.as_push_constant(); - push_constants.push_back({ .offset = range.offset, .size = range.size }); - } - - VkPipelineLayoutCreateInfo create_info - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = &m_descriptor_set_layout, - .pushConstantRangeCount = ::size32(push_constants), - .pPushConstantRanges = push_constants.data() - }; - CHECK_RESULT(vkCreatePipelineLayout(m_device, &create_info, nullptr, &m_pipeline_layout)); - } - - void program::create_descriptor_pool() - { - ensure(linked); - m_descriptor_pool = std::make_unique(); m_descriptor_pool->create(*vk::get_current_renderer(), m_descriptor_pool_sizes); } + + void descriptor_table_t::validate() const + { + // Check for overlapping locations + std::set taken_locations; + + for (auto& type_arr : m_inputs) + { + if (type_arr.empty() || + type_arr.front().type == input_type_push_constant) + { + continue; + } + + for (const auto& input : type_arr) + { + ensure(taken_locations.find(input.location) == taken_locations.end(), "Overlapping input locations found."); + taken_locations.insert(input.location); + } + } + } } } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 0b3e8ed7d1..96940a8efe 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -54,7 +54,8 @@ namespace vk using bound_data_t = std::variant; bound_data_t bound_data; - u32 location; + u32 set = 0; + u32 location = umax; std::string name; inline bound_buffer& as_buffer() { return *std::get_if(&bound_data); } @@ -69,6 +70,7 @@ namespace vk ::glsl::program_domain domain, const std::string& name, program_input_type type, + u32 set, u32 location, const bound_data_t& data = bound_buffer{}) { @@ -77,38 +79,13 @@ namespace vk .domain = domain, .type = type, .bound_data = data, + .set = set, .location = location, .name = name }; } }; - union descriptor_slot_t - { - VkDescriptorImageInfo image_info; - VkDescriptorBufferInfo buffer_info; - VkBufferView buffer_view; - - bool matches(const VkDescriptorImageInfo& test) const - { - return test.imageView == image_info.imageView && - test.sampler == image_info.sampler && - test.imageLayout == image_info.imageLayout; - } - - bool matches(const VkDescriptorBufferInfo& test) const - { - return test.buffer == buffer_info.buffer && - test.offset == buffer_info.offset && - test.range == buffer_info.range; - } - - bool matches(VkBufferView test) const - { - return test == buffer_view; - } - }; - class shader { ::glsl::program_domain type = ::glsl::program_domain::glsl_vertex_program; @@ -132,37 +109,71 @@ namespace vk VkShaderModule get_handle() const; }; - class program + using descriptor_slot_t = std::variant; + + struct descriptor_table_t { - std::array, input_type_max_enum> uniforms; VkDevice m_device = VK_NULL_HANDLE; - - VkPipeline m_pipeline = VK_NULL_HANDLE; - VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; - - std::array fs_texture_bindings; - std::array fs_texture_mirror_bindings; - std::array vs_texture_bindings; - bool linked = false; + std::array, input_type_max_enum> m_inputs; std::unique_ptr m_descriptor_pool; VkDescriptorSetLayout m_descriptor_set_layout = VK_NULL_HANDLE; vk::descriptor_set m_descriptor_set{}; rsx::simple_array m_descriptor_pool_sizes; + rsx::simple_array m_descriptor_types; std::vector m_descriptor_slots; std::vector m_descriptors_dirty; rsx::simple_array m_copy_cmds; + bool m_any_descriptors_dirty = false; - void init(); + void init(VkDevice dev); + void destroy(); + + void validate() const; void create_descriptor_set_layout(); - void create_pipeline_layout(); void create_descriptor_pool(); VkDescriptorSet allocate_descriptor_set(); void next_descriptor_set(); + template + inline void notify_descriptor_slot_updated(u32 slot, const T& data) + { + m_descriptors_dirty[slot] = true; + m_descriptor_slots[slot] = data; + m_any_descriptors_dirty = true; + } + }; + + enum binding_set_index : u32 + { + // For separate shader objects + binding_set_index_vertex = 0, + binding_set_index_fragment = 1, + + // Aliases + binding_set_index_compute = 0, + binding_set_index_unified = 0, + + // Meta + binding_set_index_max_enum = 2, + }; + + class program + { + VkDevice m_device = VK_NULL_HANDLE; + VkPipeline m_pipeline = VK_NULL_HANDLE; + VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; + + std::variant m_info; + std::array m_sets; + bool m_linked = false; + + void init(); + void create_pipeline_layout(); + program& load_uniforms(const std::vector& inputs); public: @@ -173,20 +184,18 @@ namespace vk program(program&& other) = delete; ~program(); - program& link(); + program& link(bool separate_stages); program& bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point); bool has_uniform(program_input_type type, const std::string &uniform_name); - u32 get_uniform_location(program_input_type type, const std::string& uniform_name); + std::pair get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, bool is_stencil_mirror = false); - void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point); - void bind_uniform(const VkBufferView &buffer_view, u32 binding_point); - void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name); - void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type); + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, u32 set_id, u32 binding_point); + void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, ::glsl::program_domain domain, program_input_type type, const std::string &binding_name); - void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 binding_point); + void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 set_id, u32 binding_point); inline VkPipelineLayout layout() const { return m_pipeline_layout; } inline VkPipeline value() const { return m_pipeline; } diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 2403f5bc59..9bc9e4f532 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -31,6 +31,7 @@ namespace vk ::glsl::program_domain::glsl_compute_program, "multisampled", glsl::input_type_storage_texture, + 0, 0 ), @@ -38,6 +39,7 @@ namespace vk ::glsl::program_domain::glsl_compute_program, "resolve", glsl::input_type_storage_texture, + 0, 1 ), }; @@ -51,8 +53,8 @@ namespace vk { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); - m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); - m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); + m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, 0, 0); + m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, 0, 1); } void run(const vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image) @@ -116,6 +118,7 @@ namespace vk ::glsl::glsl_fragment_program, "push_constants", glsl::input_type_push_constant, + 0, umax, glsl::push_constant_ref{ .size = 16 } )); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index da10965be9..e7c4862dbc 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -471,7 +471,7 @@ namespace vk void shader_interpreter::update_fragment_textures(const std::array& sampled_images) { // FIXME: Cannot use m_fragment_textures.start now since each interpreter has its own binding layout - u32 binding = m_current_interpreter->get_uniform_location(glsl::input_type_texture, "texture1D_array"); + auto [set, binding] = m_current_interpreter->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "texture1D_array"); if (binding == umax) { return; @@ -480,7 +480,7 @@ namespace vk const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); for (u32 i = 0; i < 4; ++i, ++binding, texture_ptr += 16) { - m_current_interpreter->bind_uniform_array(texture_ptr, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 16, binding); + m_current_interpreter->bind_uniform_array(texture_ptr, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 16, set, binding); } } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 60f33f49c5..b6def63136 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -6,7 +6,6 @@ #include "vkutils/device.h" #include "../Program/GLSLCommon.h" - std::string VKVertexDecompilerThread::getFloatTypeName(usz elementCount) { return glsl::getFloatTypeNameImpl(elementCount); @@ -27,14 +26,57 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri return glsl::compareFunctionImpl(f, Op0, Op1, scalar); } +void VKVertexDecompilerThread::prepareBindingTable() +{ + u32 location = 0; + vk_prog->binding_table.vertex_buffers_location = location; + location += 3; // Persistent verts, volatile and layout data + + vk_prog->binding_table.context_buffer_location = location++; + if (m_device_props.emulate_conditional_rendering) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + const bool is_texture_type = PT.type.starts_with("sampler"); + + for (const ParamItem& PI : PT.items) + { + if (is_texture_type) + { + const int id = vk::get_texture_index(PI.name); + vk_prog->binding_table.vtex_location[id] = location++; + continue; + } + + if (PI.name.starts_with("vc[")) + { + if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) + { + vk_prog->binding_table.cbuf_location = location++; + continue; + } + + vk_prog->binding_table.instanced_lut_buffer_location = location++; + vk_prog->binding_table.instanced_cbuf_location = location++; + continue; + } + } + } +} + void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { + prepareBindingTable(); + OS << "#version 450\n\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; OS << - "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n" + "layout(std140, set = 0, binding = " << vk_prog->binding_table.context_buffer_location << " ) uniform VertexContextBuffer\n" "{\n" " mat4 scale_offset_mat;\n" " ivec4 user_clip_enabled[2];\n" @@ -45,13 +87,31 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " float z_far;\n" "};\n\n"; + vk::glsl::program_input context_input = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_uniform_buffer, + .location = vk_prog->binding_table.context_buffer_location, + .name = "VertexContextBuffer" + }; + inputs.push_back(context_input); + if (m_device_props.emulate_conditional_rendering) { OS << - "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n" + "layout(std430, set = 0, binding = " << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" "{\n" " uint conditional_rendering_predicate;\n" "};\n\n"; + + vk::glsl::program_input predicate_input = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_storage_buffer, + .location = vk_prog->binding_table.cr_pred_buffer_location, + .name = "EXT_Conditional_Rendering" + }; + inputs.push_back(predicate_input); } OS << @@ -63,52 +123,50 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " uint layout_ptr_offset;\n" " uint xform_constants_offset;\n"; + u32 push_constants_size = 5 * sizeof(u32); if (m_device_props.emulate_conditional_rendering) { + push_constants_size += sizeof(u32); OS << " uint conditional_rendering_enabled;\n"; } OS << "};\n\n"; - vk::glsl::program_input in; - in.location = m_binding_table.vertex_params_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "VertexContextBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - inputs.push_back(in); + vk::glsl::program_input push_constants = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_push_constant, + .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } + }; + inputs.push_back(push_constants); } void VKVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::vector& /*inputs*/) { - OS << "layout(set=0, binding=5) uniform usamplerBuffer persistent_input_stream;\n"; // Data stream with persistent vertex data (cacheable) - OS << "layout(set=0, binding=6) uniform usamplerBuffer volatile_input_stream;\n"; // Data stream with per-draw data (registers and immediate draw data) - OS << "layout(set=0, binding=7) uniform usamplerBuffer vertex_layout_stream;\n"; // Data stream defining vertex data layout + static const char* input_streams[] = + { + "persistent_input_stream", // Data stream with persistent vertex data (cacheable) + "volatile_input_stream", // Data stream with per-draw data (registers and immediate draw data) + "vertex_layout_stream" // Data stream defining vertex data layout" + }; - vk::glsl::program_input in; - in.location = m_binding_table.vertex_buffers_first_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "persistent_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); + int location = vk_prog->binding_table.vertex_buffers_location; + for (const auto& stream : input_streams) + { + OS << "layout(set=0, binding=" << location << ") uniform usamplerBuffer " << stream << ";\n"; - in.location = m_binding_table.vertex_buffers_first_bind_slot + 1; - in.domain = glsl::glsl_vertex_program; - in.name = "volatile_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); - - in.location = m_binding_table.vertex_buffers_first_bind_slot + 2; - in.domain = glsl::glsl_vertex_program; - in.name = "vertex_layout_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); + vk::glsl::program_input in; + in.location = location++; + in.domain = glsl::glsl_vertex_program; + in.name = stream; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); + } } void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) { vk::glsl::program_input in; - u32 location = m_binding_table.vertex_textures_first_bind_slot; - for (const ParamType &PT : constants) { for (const ParamItem &PI : PT.items) @@ -117,12 +175,12 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std { if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) { - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[];\n"; OS << "};\n\n"; - in.location = m_binding_table.vertex_constant_buffers_bind_slot; + in.location = vk_prog->binding_table.cbuf_location; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -133,26 +191,26 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std else { // 1. Bind indirection lookup buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_lut_buffer_location << ") readonly buffer InstancingData\n"; OS << "{\n"; OS << " int constants_addressing_lookup[];\n"; OS << "};\n\n"; - in.location = m_binding_table.instancing_lookup_table_bind_slot; + in.location = vk_prog->binding_table.instanced_lut_buffer_location; in.domain = glsl::glsl_vertex_program; in.name = "InstancingData"; in.type = vk::glsl::input_type_storage_buffer; inputs.push_back(in); // 2. Bind actual constants buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 instanced_constants_array[];\n"; OS << "};\n\n"; OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n"; - in.location = m_binding_table.instancing_constants_buffer_slot; + in.location = vk_prog->binding_table.instanced_cbuf_location; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -166,7 +224,8 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std PT.type == "sampler1D" || PT.type == "sampler3D") { - in.location = location; + const int id = vk::get_texture_index(PI.name); + in.location = vk_prog->binding_table.vtex_location[id]; in.name = PI.name; in.type = vk::glsl::input_type_texture; @@ -190,7 +249,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - OS << "layout(set = 0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set = 0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; } } } @@ -371,8 +430,6 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) void VKVertexDecompilerThread::Task() { m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering(); - m_binding_table = vk::g_render_device->get_pipeline_binding_table(); - m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 1bb6dfd91c..3422333fc6 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -15,7 +15,6 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog; - vk::pipeline_binding_table m_binding_table{}; struct { @@ -36,6 +35,8 @@ protected: void insertMainStart(std::stringstream &OS) override; void insertMainEnd(std::stringstream &OS) override; + void prepareBindingTable(); + const RSXVertexProgram &rsx_vertex_program; public: VKVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray&, class VKVertexProgram &dst) @@ -61,6 +62,19 @@ public: vk::glsl::shader shader; std::vector uniforms; + // Quick attribute indices + struct + { + u32 context_buffer_location = umax; // Vertex program context + u32 cr_pred_buffer_location = umax; // Conditional rendering predicate + u32 vertex_buffers_location = umax; // Vertex input streams (3) + u32 cbuf_location = umax; // Vertex program constants register file + u32 instanced_lut_buffer_location = umax; // Instancing redirection table + u32 instanced_cbuf_location = umax; // Instancing constants register file + u32 vtex_location[4]; // Vertex textures (inf) + + } binding_table; + void Decompile(const RSXVertexProgram& prog); void Compile(); void SetInputs(std::vector& inputs); diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index d0b972765c..01a05bf598 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -76,6 +76,7 @@ namespace vk ::glsl::program_domain::glsl_compute_program, "InputTexture", vk::glsl::input_type_texture, + 0, 0 ), @@ -83,6 +84,7 @@ namespace vk ::glsl::program_domain::glsl_compute_program, "OutputTexture", vk::glsl::input_type_storage_texture, + 0, 1 ), }; @@ -103,8 +105,8 @@ namespace vk VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); - m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); + m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, 0, 0); + m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, 0, 1); } void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 7293180e08..9bc57b0987 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -333,11 +333,6 @@ namespace vk return &m_handle; } - VkDescriptorSet descriptor_set::value() const - { - return m_handle; - } - void descriptor_set::push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding) { m_push_type_mask |= (1ull << type); diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index 6c61488b6e..9fd0b436a9 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -94,8 +94,10 @@ namespace vk void swap(descriptor_set& other); descriptor_set& operator = (VkDescriptorSet set); + VkDescriptorSet value() const { return m_handle; } + operator bool() const { return m_handle != VK_NULL_HANDLE; } + VkDescriptorSet* ptr(); - VkDescriptorSet value() const; void push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding); void push(const VkDescriptorBufferInfo& buffer_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo& image_info, VkDescriptorType type, u32 binding); From aac4fbe941c6cebb6a298cb3a3702db08fcc6dca Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 12 Jun 2025 14:35:00 +0300 Subject: [PATCH 04/30] vk: Fix graphical bugs and crashes --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 21 +++++++++------------ rpcs3/Emu/RSX/VK/VKGSRender.cpp | 6 +++++- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 20 +++++++++++++------- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 1 + rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 11 ++++------- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 3 ++- 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 25f4297dee..dc5ff8640f 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -176,7 +176,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type.starts_with("sampler1D")) + if (!PT.type.starts_with("sampler")) { continue; } @@ -219,7 +219,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) ); inputs.push_back(in); - OS << "layout(set=0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=1, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; if (properties.redirected_sampler_mask & mask) { @@ -228,7 +228,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) in.location = vk_prog->binding_table.ftex_stencil_location[id]; inputs.push_back(in); - OS << "layout(set=0, binding=" << in.location << ") uniform u" << samplerType << " " << in.name << ";\n"; + OS << "layout(set=1, binding=" << in.location << ") uniform u" << samplerType << " " << in.name << ";\n"; } } } @@ -236,7 +236,7 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type.starts_with("sampler1D")) + if (PT.type.starts_with("sampler")) { continue; } @@ -249,13 +249,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (!constants_block.empty()) { - OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; OS << "{\n"; OS << constants_block; OS << "};\n\n"; } - OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; @@ -267,12 +267,12 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " float wpos_bias;\n"; OS << "};\n\n"; - OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; OS << "{\n"; OS << " sampler_info texture_parameters[16];\n"; OS << "};\n\n"; - OS << "layout(std140, set = 1, binding = " << vk_prog->binding_table.polygon_stipple_params_location << ") uniform RasterizerHeap\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.polygon_stipple_params_location << ") uniform RasterizerHeap\n"; OS << "{\n"; OS << " uvec4 stipple_pattern[8];\n"; OS << "};\n\n"; @@ -484,10 +484,7 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) { for (const ParamItem& PI : PT.items) { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") + if (PT.type.starts_with("sampler")) continue; usz offset = atoi(PI.name.c_str() + 2); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index d64551f7e9..c1d560a637 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -2078,11 +2078,15 @@ void VKGSRender::load_program_env() const auto& fs_binding_table = m_fragment_prog->binding_table; m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.context_buffer_location); - m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.cbuf_location); m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.context_buffer_location); m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.tex_param_location); m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.polygon_stipple_params_location); + if (vs_binding_table.cbuf_location != umax) + { + m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.cbuf_location); + } + if (m_shader_interpreter.is_interpreter(m_program)) { m_program->bind_uniform(m_vertex_instructions_buffer_info, vk::glsl::binding_set_index_vertex, m_shader_interpreter.get_vertex_instruction_location()); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 5ac4bd9c26..27caf615d2 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -211,6 +211,11 @@ namespace vk auto& sink = m_sets[0]; for (auto& set : m_sets) { + if (&set == &sink) + { + continue; + } + for (auto& type_arr : set.m_inputs) { if (type_arr.empty()) @@ -279,6 +284,8 @@ namespace vk return u.name == uniform_name; }); } + + return false; } std::pair program::get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name) @@ -441,8 +448,8 @@ namespace vk { if (!m_descriptor_pool) { - create_descriptor_pool(); create_descriptor_set_layout(); + create_descriptor_pool(); } return m_descriptor_pool->allocate(m_descriptor_set_layout); @@ -463,9 +470,6 @@ namespace vk return; } - auto old_set = m_descriptor_set.value(); - auto new_set = allocate_descriptor_set(); - auto push_descriptor_slot = [this](unsigned idx) { const auto& slot = m_descriptor_slots[idx]; @@ -504,16 +508,18 @@ namespace vk m_copy_cmds.push_back({ .sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, - .srcSet = old_set, + .srcSet = m_previous_set, .srcBinding = i, - .dstSet = new_set, + .dstSet = m_descriptor_set.value(), .dstBinding = i, .descriptorCount = 1 }); } m_descriptor_set.push(m_copy_cmds); // Write previous state - m_descriptor_set = new_set; + + m_previous_set = m_descriptor_set.value(); + m_descriptor_set = allocate_descriptor_set(); } void descriptor_table_t::create_descriptor_set_layout() diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 96940a8efe..785a7f9529 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -121,6 +121,7 @@ namespace vk vk::descriptor_set m_descriptor_set{}; rsx::simple_array m_descriptor_pool_sizes; rsx::simple_array m_descriptor_types; + VkDescriptorSet m_previous_set = VK_NULL_HANDLE; std::vector m_descriptor_slots; std::vector m_descriptors_dirty; diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index b6def63136..a45eb0e2bc 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -76,7 +76,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) "#extension GL_ARB_separate_shader_objects : enable\n\n"; OS << - "layout(std140, set = 0, binding = " << vk_prog->binding_table.context_buffer_location << " ) uniform VertexContextBuffer\n" + "layout(std140, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform VertexContextBuffer\n" "{\n" " mat4 scale_offset_mat;\n" " ivec4 user_clip_enabled[2];\n" @@ -99,7 +99,7 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) if (m_device_props.emulate_conditional_rendering) { OS << - "layout(std430, set = 0, binding = " << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" + "layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" "{\n" " uint conditional_rendering_predicate;\n" "};\n\n"; @@ -219,10 +219,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - if (PT.type == "sampler2D" || - PT.type == "samplerCube" || - PT.type == "sampler1D" || - PT.type == "sampler3D") + if (PT.type.starts_with("sampler")) { const int id = vk::get_texture_index(PI.name); in.location = vk_prog->binding_table.vtex_location[id]; @@ -249,7 +246,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - OS << "layout(set = 0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; } } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 9bc57b0987..c8673d4517 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -435,7 +435,8 @@ namespace vk void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) { - if ((m_push_type_mask & ~m_update_after_bind_mask) || (m_pending_writes.size() >= max_cache_size)) + if ((m_push_type_mask & ~m_update_after_bind_mask) || + (m_pending_writes.size() >= max_cache_size)) { flush(); } From 20b54f308687b66f5a93ac55e9844ad13131c715 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 12 Jun 2025 14:58:03 +0300 Subject: [PATCH 05/30] vk: Correctly initialize descriptor copy data --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 27caf615d2..fb8450a166 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -496,6 +496,8 @@ namespace vk }; m_copy_cmds.clear(); + rsx::flags32_t type_mask = 0u; + for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) { if (m_descriptors_dirty[i]) @@ -514,9 +516,11 @@ namespace vk .dstBinding = i, .descriptorCount = 1 }); + + type_mask |= (1u << m_descriptor_types[i]); } - m_descriptor_set.push(m_copy_cmds); // Write previous state + m_descriptor_set.push(m_copy_cmds, type_mask); // Write previous state m_previous_set = m_descriptor_set.value(); m_descriptor_set = allocate_descriptor_set(); From 64866098e732ed2cfa548796b7934d670ed6e335 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 12 Jun 2025 23:00:41 +0300 Subject: [PATCH 06/30] vk: Respect shader compile flags when linking --- rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp | 29 ++++++++++++++----------- rpcs3/Emu/RSX/VK/VKPipelineCompiler.h | 18 ++++++++++++--- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index 884841ec4b..26e0b64098 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -36,12 +36,12 @@ namespace vk { if (job.is_graphics_job) { - auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.inputs, {}); + auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.inputs, {}, job.flags); job.callback_func(compiled); } else { - auto compiled = int_compile_compute_pipe(job.compute_data, job.inputs); + auto compiled = int_compile_compute_pipe(job.compute_data, job.inputs, job.flags); job.callback_func(compiled); } } @@ -52,20 +52,22 @@ namespace vk std::unique_ptr pipe_compiler::int_compile_compute_pipe( const VkComputePipelineCreateInfo& create_info, - const std::vector& cs_inputs) + const std::vector& cs_inputs, + op_flags flags) { auto program = std::make_unique(*m_device, create_info, cs_inputs); - program->link(false); + program->link(flags & SEPARATE_SHADER_OBJECTS); return program; } std::unique_ptr pipe_compiler::int_compile_graphics_pipe( const VkGraphicsPipelineCreateInfo& create_info, const std::vector& vs_inputs, - const std::vector& fs_inputs) + const std::vector& fs_inputs, + op_flags flags) { auto program = std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); - program->link(true); + program->link(flags & SEPARATE_SHADER_OBJECTS); return program; } @@ -73,7 +75,8 @@ namespace vk const vk::pipeline_props &create_info, VkShaderModule modules[2], const std::vector& vs_inputs, - const std::vector& fs_inputs) + const std::vector& fs_inputs, + op_flags flags) { VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -167,7 +170,7 @@ namespace vk info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key); - return int_compile_graphics_pipe(info, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(info, vs_inputs, fs_inputs, flags); } std::unique_ptr pipe_compiler::compile( @@ -177,10 +180,10 @@ namespace vk { if (flags & COMPILE_INLINE) { - return int_compile_compute_pipe(create_info, cs_inputs); + return int_compile_compute_pipe(create_info, cs_inputs, flags); } - m_work_queue.push(create_info, cs_inputs, callback); + m_work_queue.push(create_info, cs_inputs, flags, callback); return {}; } @@ -192,7 +195,7 @@ namespace vk { // It is very inefficient to defer this as all pointers need to be saved ensure(flags & COMPILE_INLINE); - return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs, flags); } std::unique_ptr pipe_compiler::compile( @@ -206,10 +209,10 @@ namespace vk VkShaderModule modules[] = { vs, fs }; if (flags & COMPILE_INLINE) { - return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs, flags); } - m_work_queue.push(create_info, modules, vs_inputs, fs_inputs, callback); + m_work_queue.push(create_info, modules, vs_inputs, fs_inputs, flags, callback); return {}; } diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h index 25c0b8e1c0..762e8aadfc 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -117,11 +117,14 @@ namespace vk VkShaderModule graphics_modules[2]; std::vector inputs; + op_flags flags; + pipe_compiler_job( const vk::pipeline_props& props, VkShaderModule modules[2], const std::vector& vs_in, const std::vector& fs_in, + op_flags flags_, callback_t func) { callback_func = func; @@ -129,6 +132,7 @@ namespace vk graphics_modules[0] = modules[0]; graphics_modules[1] = modules[1]; is_graphics_job = true; + flags = flags_; inputs.reserve(vs_in.size() + fs_in.size()); inputs.insert(inputs.end(), vs_in.begin(), vs_in.end()); @@ -138,11 +142,16 @@ namespace vk pipe_compiler_job( const VkComputePipelineCreateInfo& props, const std::vector& cs_in, + op_flags flags_, callback_t func) { callback_func = func; compute_data = props; is_graphics_job = false; + flags = flags_; + + graphics_modules[0] = VK_NULL_HANDLE; + graphics_modules[1] = VK_NULL_HANDLE; inputs = cs_in; } @@ -153,18 +162,21 @@ namespace vk std::unique_ptr int_compile_compute_pipe( const VkComputePipelineCreateInfo& create_info, - const std::vector& cs_inputs); + const std::vector& cs_inputs, + op_flags flags); std::unique_ptr int_compile_graphics_pipe( const VkGraphicsPipelineCreateInfo& create_info, const std::vector& vs_inputs, - const std::vector& fs_inputs); + const std::vector& fs_inputs, + op_flags flags); std::unique_ptr int_compile_graphics_pipe( const vk::pipeline_props &create_info, VkShaderModule modules[2], const std::vector& vs_inputs, - const std::vector& fs_inputs); + const std::vector& fs_inputs, + op_flags flags); }; void initialize_pipe_compiler(int num_worker_threads = -1); From ae74aa336f7f220a921d8796ac3d0daeda913e85 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Thu, 12 Jun 2025 23:21:14 +0300 Subject: [PATCH 07/30] vk: Use write commands instead of copy commands to avoid dependencies --- rpcs3/Emu/RSX/VK/VKProgramBuffer.h | 4 +++- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 12 ++++++------ rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 3 +-- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 17 +++++++++++++++++ rpcs3/Emu/RSX/VK/vkutils/descriptors.h | 1 + 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 647b21adc4..3e4ee000df 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -48,7 +48,9 @@ namespace vk bool compile_async, std::function callback) { - const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; + vk::pipe_compiler::op_flags compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; + compiler_flags |= vk::pipe_compiler::SEPARATE_SHADER_OBJECTS; + auto compiler = vk::get_pipe_compiler(); auto result = compiler->compile( pipelineProperties, diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index fb8450a166..8ad89bc4a9 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -509,20 +509,20 @@ namespace vk } m_copy_cmds.push_back({ - .sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, - .srcSet = m_previous_set, - .srcBinding = i, + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = m_descriptor_set.value(), .dstBinding = i, - .descriptorCount = 1 + .descriptorCount = 1, + .descriptorType = m_descriptor_types[i], + .pImageInfo = std::get_if(&m_descriptor_slots[i]), + .pBufferInfo = std::get_if(&m_descriptor_slots[i]), + .pTexelBufferView = std::get_if(&m_descriptor_slots[i]) }); type_mask |= (1u << m_descriptor_types[i]); } m_descriptor_set.push(m_copy_cmds, type_mask); // Write previous state - - m_previous_set = m_descriptor_set.value(); m_descriptor_set = allocate_descriptor_set(); } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 785a7f9529..d0f06e2a80 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -121,11 +121,10 @@ namespace vk vk::descriptor_set m_descriptor_set{}; rsx::simple_array m_descriptor_pool_sizes; rsx::simple_array m_descriptor_types; - VkDescriptorSet m_previous_set = VK_NULL_HANDLE; std::vector m_descriptor_slots; std::vector m_descriptors_dirty; - rsx::simple_array m_copy_cmds; + rsx::simple_array m_copy_cmds; bool m_any_descriptors_dirty = false; void init(VkDevice dev); diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index c8673d4517..5a2ad92dab 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -422,6 +422,23 @@ namespace vk } } + void descriptor_set::push(rsx::simple_array& write_cmds, u32 type_mask) + { + m_push_type_mask |= type_mask; + + if (m_pending_writes.empty()) [[unlikely]] + { + m_pending_writes = std::move(write_cmds); + } + else + { + const auto old_size = m_pending_writes.size(); + const auto new_size = write_cmds.size() + old_size; + m_pending_writes.resize(new_size); + std::copy(write_cmds.begin(), write_cmds.end(), m_pending_writes.begin() + old_size); + } + } + void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) { ensure(offset.location >= 0 && offset.location <= 16); diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index 9fd0b436a9..c3e67302ae 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -103,6 +103,7 @@ namespace vk void push(const VkDescriptorImageInfo& image_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo* image_info, u32 count, VkDescriptorType type, u32 binding); void push(rsx::simple_array& copy_cmd, u32 type_mask = umax); + void push(rsx::simple_array& write_cmds, u32 type_mask = umax); void push(const descriptor_set_dynamic_offset_t& offset); void bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout); From 93e6aa63100311de368cf27b490261250f0238b0 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 13 Jun 2025 00:51:58 +0300 Subject: [PATCH 08/30] vk: Fix FSR upscaling --- rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index 01a05bf598..589defd3e8 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -61,6 +61,9 @@ namespace vk // Fill with 0 to avoid sending incomplete/unused variables to the GPU memset(m_constants_buf, 0, sizeof(m_constants_buf)); + // No ssbo usage + ssbo_count = 0; + // Enable push constants use_push_constants = true; push_constants_size = push_constants_size_; @@ -116,6 +119,11 @@ namespace vk m_input_size = input_size; m_output_size = output_size; + if (!m_program) + { + load_program(cmd); + } + configure(cmd); constexpr auto wg_size = 16; From 2ae9753d7944c5f1dc721c278c13d3f92ece1a75 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 13 Jun 2025 13:15:22 +0300 Subject: [PATCH 09/30] vk: Lazy register/derigeter of hot data --- rpcs3/Emu/RSX/VK/VKCompute.cpp | 5 +++ rpcs3/Emu/RSX/VK/VKCompute.h | 5 +++ rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 3 +- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 50 +++++++++++++----------- rpcs3/Emu/RSX/VK/vkutils/descriptors.h | 1 + 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index c164edddd7..6f676e60c9 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -250,6 +250,11 @@ namespace vk void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) { + if (!m_program) + { + load_program(cmd); + } + ensure(use_push_constants); vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index fa053afe50..9ffeb1ca7f 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -451,6 +451,11 @@ namespace vk void set_parameters(const vk::command_buffer& cmd) { + if (!m_program) + { + load_program(cmd); + } + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 8ad89bc4a9..c03a9e7b00 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -402,7 +402,8 @@ namespace vk break; } - bind_sets[count++] = set.m_descriptor_set.value(); // Current set pointer for binding + bind_sets[count++] = set.m_descriptor_set.value(); // Current set pointer for binding + set.m_descriptor_set.on_bind(); // Notify async queue set.next_descriptor_set(); // Flush queue and update pointers } diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 5a2ad92dab..ebe94fd8d3 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -14,44 +14,37 @@ namespace vk public: inline void flush_all() { + reader_lock lock(m_notifications_lock); + for (auto& set : m_notification_list) { set->flush(); } + + m_notification_list.clear(); } void register_(descriptor_set* set) { - // Rare event, upon creation of a new set tracker. - // Check for spurious 'new' events when the aux context is taking over - for (const auto& set_ : m_notification_list) - { - if (set_ == set) return; - } + std::lock_guard lock(m_notifications_lock); m_notification_list.push_back(set); - rsx_log.warning("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); + // rsx_log.notice("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); } void deregister(descriptor_set* set) { - for (auto it = m_notification_list.begin(); it != m_notification_list.end(); ++it) - { - if (*it == set) - { - *it = m_notification_list.back(); - m_notification_list.pop_back(); - break; - } - } + std::lock_guard lock(m_notifications_lock); - rsx_log.warning("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); + m_notification_list.erase_if(FN(x == set)); + // rsx_log.notice("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); } dispatch_manager() = default; private: rsx::simple_array m_notification_list; + shared_mutex m_notifications_lock; dispatch_manager(const dispatch_manager&) = delete; dispatch_manager& operator = (const dispatch_manager&) = delete; @@ -295,11 +288,6 @@ namespace vk m_in_use = true; m_update_after_bind_mask = g_render_device->get_descriptor_update_after_bind_support(); - - if (m_update_after_bind_mask) - { - g_fxo->get().register_(this); - } } else if (m_push_type_mask & ~m_update_after_bind_mask) { @@ -450,13 +438,29 @@ namespace vk m_dynamic_offsets[offset.location] = offset.value; } - void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) + void descriptor_set::on_bind() { + if (!m_push_type_mask) + { + return; + } + + // We have queued writes if ((m_push_type_mask & ~m_update_after_bind_mask) || (m_pending_writes.size() >= max_cache_size)) { flush(); } + else if (m_update_after_bind_mask) + { + // Register for async flush + g_fxo->get().register_(this); + } + } + + void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) + { + on_bind(); vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, ::size32(m_dynamic_offsets), m_dynamic_offsets.data()); } diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index c3e67302ae..556fe5d0b9 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -106,6 +106,7 @@ namespace vk void push(rsx::simple_array& write_cmds, u32 type_mask = umax); void push(const descriptor_set_dynamic_offset_t& offset); + void on_bind(); void bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout); void flush(); From 2c8c788d8156bc179ab31010d2970c21e4eeca73 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 13 Jun 2025 13:44:46 +0300 Subject: [PATCH 10/30] vk: Use standard C++ --- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index ebe94fd8d3..d8e8fcd7ff 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -14,7 +14,7 @@ namespace vk public: inline void flush_all() { - reader_lock lock(m_notifications_lock); + std::shared_lock lock(m_notifications_lock); for (auto& set : m_notification_list) { @@ -44,7 +44,7 @@ namespace vk private: rsx::simple_array m_notification_list; - shared_mutex m_notifications_lock; + std::shared_mutex m_notifications_lock; dispatch_manager(const dispatch_manager&) = delete; dispatch_manager& operator = (const dispatch_manager&) = delete; From f241825c730b0284159a06db58dd56dd6e4530d0 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 14:21:25 +0300 Subject: [PATCH 11/30] vk: Update binding model for compute jobs --- rpcs3/Emu/RSX/Common/simple_array.hpp | 5 +++ rpcs3/Emu/RSX/VK/VKCompute.cpp | 23 +++++++------- rpcs3/Emu/RSX/VK/VKCompute.h | 33 ++++++++++---------- rpcs3/Emu/RSX/VK/VKResolveHelper.h | 2 +- rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp | 2 +- rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h | 2 +- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 22 +++++++------ 7 files changed, 47 insertions(+), 42 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index dfec324eeb..06e0b1870d 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -305,6 +305,11 @@ namespace rsx return _size * sizeof(Ty); } + u32 size_bytes32() const + { + return _size * sizeof(Ty); + } + u32 capacity() const { return _capacity; diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 6f676e60c9..9d91773988 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -132,7 +132,7 @@ namespace vk m_program = compiler->compile(create_info, vk::pipe_compiler::COMPILE_INLINE, {}, get_inputs()); } - bind_resources(); + bind_resources(cmd); m_program->bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE); } @@ -243,20 +243,19 @@ namespace vk m_src += suffix; } - void cs_shuffle_base::bind_resources() + void cs_shuffle_base::bind_resources(const vk::command_buffer& cmd) { + set_parameters(cmd); m_program->bind_uniform({ m_data->value, m_data_offset, m_data_length }, 0, 0); } - void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) + void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd) { - if (!m_program) + if (!m_params.empty()) { - load_program(cmd); + ensure(use_push_constants); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, m_params.size_bytes32(), m_params.data()); } - - ensure(use_push_constants); - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); } void cs_shuffle_base::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset) @@ -294,15 +293,15 @@ namespace vk " uint stencil_offset;\n"; } - void cs_interleave_task::bind_resources() + void cs_interleave_task::bind_resources(const vk::command_buffer& cmd) { + set_parameters(cmd); m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } void cs_interleave_task::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) { - u32 parameters[4] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; - set_parameters(cmd, parameters, 4); + m_params = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; ensure(stencil_offset > data_offset); m_ssbo_length = stencil_offset + (data_length / 4) - data_offset; @@ -354,7 +353,7 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void cs_aggregator::bind_resources() + void cs_aggregator::bind_resources(const vk::command_buffer& cmd) { m_program->bind_uniform({ src->value, 0, block_length }, 0, 0); m_program->bind_uniform({ dst->value, 0, 4 }, 0, 1); diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 9ffeb1ca7f..0dfa80f4f5 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -35,7 +35,7 @@ namespace vk void destroy(); virtual std::vector get_inputs(); - virtual void bind_resources() {} + virtual void bind_resources(const vk::command_buffer& cmd) {} void load_program(const vk::command_buffer& cmd); @@ -50,6 +50,8 @@ namespace vk u32 m_data_length = 0; u32 kernel_size = 1; + rsx::simple_array m_params; + std::string variables, work_kernel, loop_advance, suffix; std::string method_declarations; @@ -57,9 +59,9 @@ namespace vk void build(const char* function_name, u32 _kernel_size = 0); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; - void set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count); + void set_parameters(const vk::command_buffer& cmd); void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0); }; @@ -125,7 +127,7 @@ namespace vk cs_interleave_task(); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset); }; @@ -342,8 +344,9 @@ namespace vk cs_shuffle_base::build(""); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { + set_parameters(cmd); m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } @@ -361,8 +364,7 @@ namespace vk data_offset = src_offset; } - u32 parameters[4] = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; - set_parameters(cmd, parameters, 4); + m_params = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; cs_shuffle_base::run(cmd, data, src_length, data_offset); } }; @@ -443,19 +445,16 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { + set_parameters(cmd); + m_program->bind_uniform({ src_buffer->value, in_offset, block_length }, 0, 0); m_program->bind_uniform({ dst_buffer->value, out_offset, block_length }, 0, 1); } void set_parameters(const vk::command_buffer& cmd) { - if (!m_program) - { - load_program(cmd); - } - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); } @@ -475,7 +474,6 @@ namespace vk params.logw = rsx::ceil_log2(width); params.logh = rsx::ceil_log2(height); params.logd = rsx::ceil_log2(depth); - set_parameters(cmd); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); @@ -492,7 +490,7 @@ namespace vk cs_aggregator(); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; void run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words); }; @@ -576,8 +574,10 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { + set_parameters(cmd); + const auto op = static_cast(Op); m_program->bind_uniform({ src_buffer->value, in_offset, in_block_length }, 0u, 0u ^ op); m_program->bind_uniform({ dst_buffer->value, out_offset, out_block_length }, 0u, 1u ^ op); @@ -648,7 +648,6 @@ namespace vk params.image_height = (Op == RSX_detiler_op::decode) ? tile_aligned_height : config.image_height; params.image_pitch = config.image_pitch; params.image_bpp = config.image_bpp; - set_parameters(cmd); const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1; const u32 virtual_width = config.image_width / subtexels_per_invocation; diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 9bc9e4f532..a9064eff95 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -49,7 +49,7 @@ namespace vk return result; } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index 589defd3e8..23a4733c60 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -97,7 +97,7 @@ namespace vk return result; } - void fsr_pass::bind_resources() + void fsr_pass::bind_resources(const vk::command_buffer& cmd) { // Bind relevant stuff if (!m_sampler) diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h index 6d9b15d72a..7bff58b049 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h @@ -20,7 +20,7 @@ namespace vk u32 m_constants_buf[20]; std::vector get_inputs() override; - void bind_resources() override; + void bind_resources(const vk::command_buffer&) override; virtual void configure(const vk::command_buffer& cmd) = 0; diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index d8e8fcd7ff..a49dc66301 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -14,7 +14,7 @@ namespace vk public: inline void flush_all() { - std::shared_lock lock(m_notifications_lock); + std::lock_guard lock(m_notifications_lock); for (auto& set : m_notification_list) { @@ -44,7 +44,7 @@ namespace vk private: rsx::simple_array m_notification_list; - std::shared_mutex m_notifications_lock; + std::mutex m_notifications_lock; dispatch_manager(const dispatch_manager&) = delete; dispatch_manager& operator = (const dispatch_manager&) = delete; @@ -81,17 +81,17 @@ namespace vk } else { - binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; + binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT; } } - binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; binding_infos.pNext = nullptr; binding_infos.bindingCount = ::size32(binding_flags); binding_infos.pBindingFlags = binding_flags.data(); infos.pNext = &binding_infos; - infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; + infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; VkDescriptorSetLayout result; CHECK_RESULT(vkCreateDescriptorSetLayout(*g_render_device, &infos, nullptr, &result)); @@ -442,6 +442,7 @@ namespace vk { if (!m_push_type_mask) { + ensure(m_pending_writes.empty()); return; } @@ -450,16 +451,17 @@ namespace vk (m_pending_writes.size() >= max_cache_size)) { flush(); + return; } - else if (m_update_after_bind_mask) - { - // Register for async flush - g_fxo->get().register_(this); - } + + // Register for async flush + ensure(m_update_after_bind_mask); + g_fxo->get().register_(this); } void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) { + // Notify on_bind(); vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, ::size32(m_dynamic_offsets), m_dynamic_offsets.data()); From ffa835efac9edfc6e8ad91c5f74aeb1fde54dc1f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 14:22:04 +0300 Subject: [PATCH 12/30] vk: Use shared layout generator for all pipelines - Common code applying flags uniformly fixes bugs with misconfigured options --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index c03a9e7b00..d7bdfcbb36 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -568,14 +568,7 @@ namespace vk } } - VkDescriptorSetLayoutCreateInfo set_layout_create_info - { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .flags = 0, - .bindingCount = ::size32(bindings), - .pBindings = bindings.data() - }; - CHECK_RESULT(vkCreateDescriptorSetLayout(m_device, &set_layout_create_info, nullptr, &m_descriptor_set_layout)); + m_descriptor_set_layout = vk::descriptors::create_layout(bindings); } void descriptor_table_t::create_descriptor_pool() From 3a65359d59285f95ddc18a532a4f47e87ed71bb9 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 15:01:45 +0300 Subject: [PATCH 13/30] vk: Fix clang build and resource leak on exit --- rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp | 3 ++- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 ++++ rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 5 ++++- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 13 +++++++++++++ rpcs3/Emu/RSX/VK/vkutils/descriptors.h | 3 ++- 5 files changed, 25 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 1191561625..790421e474 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -46,9 +46,10 @@ namespace vk #define IS_DIGIT(x) (x >= '0' && x <= '9') constexpr int max_index_length = 2; + const int name_length = static_cast(name.length()); std::string index; - for (int char_idx = name.length() - max_index_length; char_idx < name.length(); ++char_idx) + for (int char_idx = name_length - max_index_length; char_idx < name_length; ++char_idx) { if (IS_DIGIT(name[char_idx])) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index c1d560a637..4506c870ec 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,3 +1,4 @@ +#include "Emu/RSX/VK/vkutils/descriptors.h" #include "stdafx.h" #include "../Overlays/overlay_compile_notification.h" #include "../Overlays/Shaders/shader_loading_dialog_native.h" @@ -854,6 +855,9 @@ VKGSRender::~VKGSRender() m_command_buffer_pool.destroy(); m_secondary_command_buffer_pool.destroy(); + // Descriptors + vk::descriptors::flush(); + // Global resources vk::destroy_global_resources(); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index d7bdfcbb36..b5bc7642ba 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -420,7 +420,10 @@ namespace vk } vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); - vk::get_resource_manager()->dispose(m_descriptor_pool); + m_descriptor_pool->destroy(); + + m_descriptor_pool.reset(); + m_device = VK_NULL_HANDLE; } void descriptor_table_t::init(VkDevice dev) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index a49dc66301..9e5880a7ee 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -40,6 +40,12 @@ namespace vk // rsx_log.notice("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); } + void destroy() + { + std::lock_guard lock(m_notifications_lock); + m_notification_list.clear(); + } + dispatch_manager() = default; private: @@ -60,6 +66,11 @@ namespace vk g_fxo->get().flush_all(); } + void destroy() + { + g_fxo->get().destroy(); + } + VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings) { VkDescriptorSetLayoutCreateInfo infos = {}; @@ -414,11 +425,13 @@ namespace vk { m_push_type_mask |= type_mask; +#if !defined(__clang__) || (__clang_major__ >= 16) if (m_pending_writes.empty()) [[unlikely]] { m_pending_writes = std::move(write_cmds); } else +#endif { const auto old_size = m_pending_writes.size(); const auto new_size = write_cmds.size() + old_size; diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index 556fe5d0b9..c2cf5deb20 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -122,7 +122,7 @@ namespace vk rsx::simple_array m_image_info_pool; rsx::simple_array m_dynamic_offsets; -#ifdef __clang__ +#if defined(__clang__) && (__clang_major__ < 16) // Clang (pre 16.x) does not support LWG 2089, std::construct_at for POD types struct WriteDescriptorSetT : public VkWriteDescriptorSet { @@ -162,6 +162,7 @@ namespace vk { void init(); void flush(); + void destroy(); VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings); } From bb1c0a5eee4a31c1ded5b37c1e3a41fd7068b06c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 22:04:57 +0300 Subject: [PATCH 14/30] rsx/util: Support basic array merge --- rpcs3/Emu/RSX/Common/simple_array.hpp | 7 +++++++ rpcs3/tests/test_simple_array.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 06e0b1870d..033994547d 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -285,6 +285,13 @@ namespace rsx return pos; } + void operator += (const rsx::simple_array& that) + { + const auto old_size = _size; + resize(_size + that._size); + std::memcpy(data() + old_size, that.data(), that.size_bytes()); + } + void clear() { _size = 0; diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index f64e01200e..916284a6cd 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -189,4 +189,29 @@ namespace rsx EXPECT_EQ(arr[i], i + 1); } } + + TEST(SimpleArray, Merge) + { + rsx::simple_array arr{ 1 }; + rsx::simple_array arr2{ 2, 3, 4, 5, 6, 7, 8, 9 }; + rsx::simple_array arr3{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }; + + // Check small vector optimization + EXPECT_TRUE(arr.is_local_storage()); + + // Small vector optimization holds after append + arr += arr2; + EXPECT_TRUE(arr.is_local_storage()); + + // Exceed the boundary and we move into dynamic alloc + arr += arr3; + EXPECT_FALSE(arr.is_local_storage()); + + // Verify contents + EXPECT_EQ(arr.size(), 30); + for (int i = 0; i < 30; ++i) + { + EXPECT_EQ(arr[i], i + 1); + } + } } From 5417d4854df208cb7791b62cacdf8ab0daab8340 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 22:05:36 +0300 Subject: [PATCH 15/30] vk: Fix edge cases in descriptor update logic --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 6 +++--- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index b5bc7642ba..892c217f6c 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -399,12 +399,11 @@ namespace vk { if (!set.m_device) { - break; + continue; } bind_sets[count++] = set.m_descriptor_set.value(); // Current set pointer for binding - set.m_descriptor_set.on_bind(); // Notify async queue - set.next_descriptor_set(); // Flush queue and update pointers + set.on_bind(); // Notify bind event. Internally updates handles and triggers flushing. } vkCmdBindPipeline(cmd, bind_point, m_pipeline); @@ -528,6 +527,7 @@ namespace vk m_descriptor_set.push(m_copy_cmds, type_mask); // Write previous state m_descriptor_set = allocate_descriptor_set(); + m_any_descriptors_dirty = false; } void descriptor_table_t::create_descriptor_set_layout() diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index d0f06e2a80..36653facf7 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -138,6 +138,12 @@ namespace vk VkDescriptorSet allocate_descriptor_set(); void next_descriptor_set(); + inline void on_bind() + { + next_descriptor_set(); // Enqueue changes and update pointers + m_descriptor_set.on_bind(); // Notify async queue to flush any pending changes + } + template inline void notify_descriptor_slot_updated(u32 slot, const T& data) { From 15791cf94eea005166fe8a7cdf74a0344d3a0a7b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 22:56:32 +0300 Subject: [PATCH 16/30] vk: Fix descriptor set update and caching model to support skipped updates --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 31 +++++++++++++++--------- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 8 +----- rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp | 19 ++++----------- 3 files changed, 25 insertions(+), 33 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 892c217f6c..18d658fc42 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -364,7 +364,7 @@ namespace vk break; } - set.next_descriptor_set(); // Initializes the set layout and allocates first set + set.create_descriptor_set_layout(); set_layouts.push_back(set.m_descriptor_set_layout); for (const auto& input : set.m_inputs[input_type_push_constant]) @@ -402,8 +402,7 @@ namespace vk continue; } - bind_sets[count++] = set.m_descriptor_set.value(); // Current set pointer for binding - set.on_bind(); // Notify bind event. Internally updates handles and triggers flushing. + bind_sets[count++] = set.commit(); // Commit variable changes and return handle to the new set } vkCmdBindPipeline(cmd, bind_point, m_pipeline); @@ -418,10 +417,17 @@ namespace vk return; } - vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); - m_descriptor_pool->destroy(); + if (m_descriptor_set_layout) + { + vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); + } + + if (m_descriptor_pool) + { + m_descriptor_pool->destroy(); + m_descriptor_pool.reset(); + } - m_descriptor_pool.reset(); m_device = VK_NULL_HANDLE; } @@ -451,26 +457,24 @@ namespace vk { if (!m_descriptor_pool) { - create_descriptor_set_layout(); create_descriptor_pool(); } return m_descriptor_pool->allocate(m_descriptor_set_layout); } - void descriptor_table_t::next_descriptor_set() + VkDescriptorSet descriptor_table_t::commit() { if (!m_descriptor_set) { - m_descriptor_set = allocate_descriptor_set(); + m_any_descriptors_dirty = true; std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); - return; } // Check if we need to actually open a new set if (!m_any_descriptors_dirty) { - return; + return m_descriptor_set.value(); } auto push_descriptor_slot = [this](unsigned idx) @@ -500,6 +504,7 @@ namespace vk m_copy_cmds.clear(); rsx::flags32_t type_mask = 0u; + m_descriptor_set = allocate_descriptor_set(); for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) { @@ -526,8 +531,10 @@ namespace vk } m_descriptor_set.push(m_copy_cmds, type_mask); // Write previous state - m_descriptor_set = allocate_descriptor_set(); + m_descriptor_set.on_bind(); m_any_descriptors_dirty = false; + + return m_descriptor_set.value(); } void descriptor_table_t::create_descriptor_set_layout() diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 36653facf7..bc36b936e7 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -136,13 +136,7 @@ namespace vk void create_descriptor_pool(); VkDescriptorSet allocate_descriptor_set(); - void next_descriptor_set(); - - inline void on_bind() - { - next_descriptor_set(); // Enqueue changes and update pointers - m_descriptor_set.on_bind(); // Notify async queue to flush any pending changes - } + VkDescriptorSet commit(); template inline void notify_descriptor_slot_updated(u32 slot, const T& data) diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 9e5880a7ee..b5e62f33f6 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -411,14 +411,10 @@ namespace vk if (m_pending_copies.empty()) [[likely]] { m_pending_copies = std::move(copy_cmd); + return; } - else - { - const auto old_size = m_pending_copies.size(); - const auto new_size = copy_cmd.size() + old_size; - m_pending_copies.resize(new_size); - std::copy(copy_cmd.begin(), copy_cmd.end(), m_pending_copies.begin() + old_size); - } + + m_pending_copies += copy_cmd; } void descriptor_set::push(rsx::simple_array& write_cmds, u32 type_mask) @@ -429,15 +425,10 @@ namespace vk if (m_pending_writes.empty()) [[unlikely]] { m_pending_writes = std::move(write_cmds); + return; } - else #endif - { - const auto old_size = m_pending_writes.size(); - const auto new_size = write_cmds.size() + old_size; - m_pending_writes.resize(new_size); - std::copy(write_cmds.begin(), write_cmds.end(), m_pending_writes.begin() + old_size); - } + m_pending_writes += write_cmds; } void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) From b3492b73ad678aa138df3f3f1e7eec27b3e46d7a Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 15 Jun 2025 23:45:11 +0300 Subject: [PATCH 17/30] vk: Improve pipeline layout validation and fix slot allocation bugs --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 3 +++ rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 20 ++++++++++++++------ rpcs3/Emu/RSX/VK/VKVertexProgram.cpp | 2 ++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index dc5ff8640f..7aee3c7d95 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -57,6 +57,9 @@ void VKFragmentDecompilerThread::prepareBindingTable() vk_prog->binding_table.tex_param_location = location++; vk_prog->binding_table.polygon_stipple_params_location = location++; + std::memset(vk_prog->binding_table.ftex_location, 0xff, sizeof(vk_prog->binding_table.ftex_location)); + std::memset(vk_prog->binding_table.ftex_stencil_location, 0xff, sizeof(vk_prog->binding_table.ftex_stencil_location)); + if (has_textures) [[ likely ]] { unsigned num_textures = 0; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 18d658fc42..9f53e62f2e 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -547,6 +547,8 @@ namespace vk m_descriptor_pool_sizes.clear(); m_descriptor_pool_sizes.reserve(input_type_max_enum); + std::unordered_map descriptor_type_map; + for (const auto& type_arr : m_inputs) { if (type_arr.empty() || type_arr.front().type == input_type_push_constant) @@ -568,16 +570,22 @@ namespace vk }; bindings.push_back(binding); - if (m_descriptor_types.size() < (input.location + 1)) - { - m_descriptor_types.resize((input.location + 1)); - } - - m_descriptor_types[input.location] = type; + descriptor_type_map[input.location] = type; m_descriptor_pool_sizes.back().descriptorCount++; } } + m_descriptor_types.resize(::size32(m_descriptors_dirty)); + + for (u32 i = 0; i < ::size32(m_descriptors_dirty); ++i) + { + if (descriptor_type_map.find(i) == descriptor_type_map.end()) + { + fmt::throw_exception("Invalid input structure. Some input bindings were not declared!"); + } + m_descriptor_types[i] = descriptor_type_map[i]; + } + m_descriptor_set_layout = vk::descriptors::create_layout(bindings); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index a45eb0e2bc..70d0972984 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -38,6 +38,8 @@ void VKVertexDecompilerThread::prepareBindingTable() vk_prog->binding_table.cr_pred_buffer_location = location++; } + std::memset(vk_prog->binding_table.vtex_location, 0xff, sizeof(vk_prog->binding_table.vtex_location)); + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { const bool is_texture_type = PT.type.starts_with("sampler"); From 91491c7cf388b3da067e1969f4aab839b9f757b7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 03:13:24 +0300 Subject: [PATCH 18/30] vk: Drop copy optimization - The pointer-based nature of write entries, changes invalidate previous data - Instead of managing scratch, just push to the descriptors built-in management which is quite optimal --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 18 +++--------------- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 1 - 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 9f53e62f2e..85454481a2 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -502,8 +502,6 @@ namespace vk fmt::throw_exception("Unexpected descriptor structure at index %u", idx); }; - m_copy_cmds.clear(); - rsx::flags32_t type_mask = 0u; m_descriptor_set = allocate_descriptor_set(); for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) @@ -516,21 +514,11 @@ namespace vk continue; } - m_copy_cmds.push_back({ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = m_descriptor_set.value(), - .dstBinding = i, - .descriptorCount = 1, - .descriptorType = m_descriptor_types[i], - .pImageInfo = std::get_if(&m_descriptor_slots[i]), - .pBufferInfo = std::get_if(&m_descriptor_slots[i]), - .pTexelBufferView = std::get_if(&m_descriptor_slots[i]) - }); - - type_mask |= (1u << m_descriptor_types[i]); + // We should copy here if possible. + // Without descriptor_buffer, the most efficient option is to just use the normal bind logic due to the pointer-based nature of the descriptor inputs and no stride. + push_descriptor_slot(i); } - m_descriptor_set.push(m_copy_cmds, type_mask); // Write previous state m_descriptor_set.on_bind(); m_any_descriptors_dirty = false; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index bc36b936e7..81c3ff8525 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -124,7 +124,6 @@ namespace vk std::vector m_descriptor_slots; std::vector m_descriptors_dirty; - rsx::simple_array m_copy_cmds; bool m_any_descriptors_dirty = false; void init(VkDevice dev); From cdc78f81f761b43e4baafd3f368893d13f598959 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 14:43:38 +0300 Subject: [PATCH 19/30] vk: Code improvements --- rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp | 6 +----- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 790421e474..815492caa3 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -43,22 +43,18 @@ namespace vk fmt::throw_exception("Invalid texture name: '%s'", name); } -#define IS_DIGIT(x) (x >= '0' && x <= '9') - constexpr int max_index_length = 2; const int name_length = static_cast(name.length()); std::string index; for (int char_idx = name_length - max_index_length; char_idx < name_length; ++char_idx) { - if (IS_DIGIT(name[char_idx])) + if (std::isdigit(name[char_idx])) { index += name[char_idx]; } } -#undef IS_DIGIT - if (index.empty()) { fmt::throw_exception("Invalid texture name: '%s'", name); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 85454481a2..967828655e 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -361,7 +361,7 @@ namespace vk { if (!set.m_device) { - break; + continue; } set.create_descriptor_set_layout(); From 91e22aa4e48a19cd2086182bacb9c5e4f8fdcf8b Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 15:45:56 +0300 Subject: [PATCH 20/30] vk: Fix FS stencil mirror binding location overwrites causing holes in descriptor layout --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 7aee3c7d95..94012840ec 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -92,7 +92,7 @@ void VKFragmentDecompilerThread::prepareBindingTable() { for (auto& stencil_location : vk_prog->binding_table.ftex_stencil_location) { - if (stencil_location == umax) + if (stencil_location != 0) { continue; } From dd28d100d61b79bd317484f4fd7edc1998df907f Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 16:19:48 +0300 Subject: [PATCH 21/30] vk: Fix crash when running attachment clear pass --- rpcs3/Emu/RSX/VK/VKOverlays.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index 6a74f8e646..743339e26c 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -43,6 +43,10 @@ namespace vk if (!m_vao.heap) { m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128); + } + + if (!m_ubo.heap && m_num_uniform_buffers > 0) + { m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128); } } @@ -704,6 +708,9 @@ namespace vk // Disable samplers m_num_usable_samplers = 0; + // Disable UBOs + m_num_uniform_buffers = 0; + renderpass_config.set_depth_mask(false); renderpass_config.set_color_mask(0, true, true, true, true); renderpass_config.set_attachment_count(1); @@ -711,6 +718,7 @@ namespace vk std::vector attachment_clear_pass::get_vertex_inputs() { + check_heap(); return { vk::glsl::program_input::make( From 16a0ae6a7bc46773e8414f661b8e7e3f633691be Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 18:28:35 +0300 Subject: [PATCH 22/30] vk: Update shader interpreter to use dynamic binding layout --- rpcs3/Emu/RSX/Program/ShaderInterpreter.h | 3 +- rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp | 151 ---------- rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h | 7 - rpcs3/Emu/RSX/VK/VKDraw.cpp | 21 +- rpcs3/Emu/RSX/VK/VKFragmentProgram.h | 2 +- rpcs3/Emu/RSX/VK/VKGSRender.cpp | 55 +++- rpcs3/Emu/RSX/VK/VKGSRender.h | 8 + rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 301 ++++++++------------ rpcs3/Emu/RSX/VK/VKShaderInterpreter.h | 23 +- rpcs3/Emu/RSX/VK/vkutils/device.cpp | 12 - rpcs3/Emu/RSX/VK/vkutils/device.h | 3 - 11 files changed, 200 insertions(+), 386 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h index 5503a2870c..f89c058dec 100644 --- a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h @@ -20,8 +20,9 @@ namespace program_common COMPILER_OPT_ENABLE_KIL = (1 << 11), COMPILER_OPT_ENABLE_STIPPLING = (1 << 12), COMPILER_OPT_ENABLE_INSTANCING = (1 << 13), + COMPILER_OPT_ENABLE_VTX_TEXTURES = (1 << 14), - COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_INSTANCING + COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_VTX_TEXTURES }; static std::string get_vertex_interpreter() diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp index 602d855d76..76cda4d253 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp @@ -8,157 +8,6 @@ namespace vk { - rsx::simple_array get_common_binding_table() - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - rsx::simple_array bindings(binding_table.instancing_constants_buffer_slot + 1); - - u32 idx = 0; - - // Vertex stream, one stream for cacheable data, one stream for transient data - for (int i = 0; i < 3; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_state_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_texture_params_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; - bindings[idx].binding = binding_table.vertex_params_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.conditional_render_predicate_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.rasterizer_env_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_constants_buffer_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - return bindings; - } - - std::tuple> - get_common_pipeline_layout(VkDevice dev) - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - auto bindings = get_common_binding_table(); - u32 idx = ::size32(bindings); - - bindings.resize(binding_table.total_descriptor_bindings); - - for (auto binding = binding_table.textures_first_bind_slot; - binding < binding_table.vertex_textures_first_bind_slot; - binding++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - ensure(idx == binding_table.total_descriptor_bindings); - - std::array push_constants; - push_constants[0].offset = 0; - push_constants[0].size = 20; - push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - - if (vk::emulate_conditional_rendering()) - { - // Conditional render toggle - push_constants[0].size = 24; - } - - const auto set_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &set_layout; - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = push_constants.data(); - - VkPipelineLayout result; - CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return std::make_tuple(result, set_layout, bindings); - } - rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings) { // Compile descriptor pool sizes diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h index 371d0ebf76..e5ada45bf8 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h @@ -5,13 +5,6 @@ namespace vk { - // Grab standard layout for decompiled RSX programs. Also used by the interpreter. - // FIXME: This generates a bloated monstrosity that needs to die. - std::tuple> get_common_pipeline_layout(VkDevice dev); - - // Returns the standard binding layout without texture slots. Those have special handling depending on the consumer. - rsx::simple_array get_common_binding_table(); - // Returns an array of pool sizes that can be used to generate a proper descriptor pool rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 1e96087694..008d51f4bb 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -555,7 +555,7 @@ bool VKGSRender::bind_texture_env() { m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, vk::glsl::binding_set_index_fragment, - m_fragment_prog->binding_table.ftex_location[i]); + m_fs_binding_table->ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { @@ -576,7 +576,7 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, vk::glsl::binding_set_index_fragment, - m_fragment_prog->binding_table.ftex_stencil_location[i]); + m_fs_binding_table->ftex_stencil_location[i]); } } else @@ -584,13 +584,13 @@ bool VKGSRender::bind_texture_env() const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, vk::glsl::binding_set_index_fragment, - m_fragment_prog->binding_table.ftex_location[i]); + m_fs_binding_table->ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, vk::glsl::binding_set_index_fragment, - m_fragment_prog->binding_table.ftex_stencil_location[i]); + m_fs_binding_table->ftex_stencil_location[i]); } } } @@ -605,7 +605,7 @@ bool VKGSRender::bind_texture_env() const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, vk::glsl::binding_set_index_vertex, - m_vertex_prog->binding_table.vtex_location[i]); + m_vs_binding_table->vtex_location[i]); continue; } @@ -628,7 +628,7 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, vk::glsl::binding_set_index_vertex, - m_vertex_prog->binding_table.vtex_location[i]); + m_vs_binding_table->vtex_location[i]); continue; } @@ -637,7 +637,7 @@ bool VKGSRender::bind_texture_env() m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, vk::glsl::binding_set_index_vertex, - m_vertex_prog->binding_table.vtex_location[i]); + m_vs_binding_table->vtex_location[i]); } return out_of_memory; @@ -874,10 +874,9 @@ void VKGSRender::emit_geometry(u32 sub_index) ensure(m_vertex_layout_storage); if (update_descriptors) { - const auto& binding_table = m_vertex_prog->binding_table; - m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location); - m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location + 1); - m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, binding_table.vertex_buffers_location + 2); + m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location); + m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 1); + m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2); } bool reload_state = (!m_current_draw.subdraw_id++); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index 049455a866..c51b81b8fc 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -10,7 +10,7 @@ namespace vk class shader_interpreter; } -struct VKFragmentDecompilerThread : public FragmentProgramDecompiler +class VKFragmentDecompilerThread : public FragmentProgramDecompiler { friend class vk::shader_interpreter; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4506c870ec..b2003f645c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1780,8 +1780,11 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, + current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); + + std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); return true; } } @@ -1879,6 +1882,7 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, + current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); @@ -1900,6 +1904,16 @@ bool VKGSRender::load_program() } } + if (m_program) + { + std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); + } + else + { + m_vs_binding_table = nullptr; + m_fs_binding_table = nullptr; + } + return m_program != nullptr; } @@ -1911,13 +1925,14 @@ void VKGSRender::load_program_env() } const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; + const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty); const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); - const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); + const bool update_instruction_buffers = (!!m_interpreter_state && is_interpreter); const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw; @@ -2078,17 +2093,14 @@ void VKGSRender::load_program_env() } } - const auto& vs_binding_table = m_vertex_prog->binding_table; - const auto& fs_binding_table = m_fragment_prog->binding_table; + m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->context_buffer_location); + m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->context_buffer_location); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->tex_param_location); + m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->polygon_stipple_params_location); - m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.context_buffer_location); - m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.context_buffer_location); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.tex_param_location); - m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.polygon_stipple_params_location); - - if (vs_binding_table.cbuf_location != umax) + if (m_vs_binding_table->cbuf_location != umax) { - m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.cbuf_location); + m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cbuf_location); } if (m_shader_interpreter.is_interpreter(m_program)) @@ -2096,21 +2108,21 @@ void VKGSRender::load_program_env() m_program->bind_uniform(m_vertex_instructions_buffer_info, vk::glsl::binding_set_index_vertex, m_shader_interpreter.get_vertex_instruction_location()); m_program->bind_uniform(m_fragment_instructions_buffer_info, vk::glsl::binding_set_index_fragment, m_shader_interpreter.get_fragment_instruction_location()); } - else if (fs_binding_table.cbuf_location != umax) + else if (m_fs_binding_table->cbuf_location != umax) { - m_program->bind_uniform(m_fragment_constants_buffer_info, vk::glsl::binding_set_index_fragment, fs_binding_table.cbuf_location); + m_program->bind_uniform(m_fragment_constants_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->cbuf_location); } if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; - m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, vs_binding_table.cr_pred_buffer_location); + m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location); } if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) { - m_program->bind_uniform(m_instancing_indirection_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.instanced_lut_buffer_location); - m_program->bind_uniform(m_instancing_constants_array_buffer_info, vk::glsl::binding_set_index_vertex, vs_binding_table.instanced_cbuf_location); + m_program->bind_uniform(m_instancing_indirection_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_lut_buffer_location); + m_program->bind_uniform(m_instancing_constants_array_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_cbuf_location); } // Clear flags @@ -2137,6 +2149,19 @@ void VKGSRender::load_program_env() m_graphics_state.clear(handled_flags); } +std::pair VKGSRender::get_binding_table() const +{ + ensure(m_program); + + if (!m_shader_interpreter.is_interpreter(m_program)) + { + return { &m_vertex_prog->binding_table, &m_fragment_prog->binding_table }; + } + + const auto& [vs, fs] = m_shader_interpreter.get_shaders(); + return { &vs->binding_table, &fs->binding_table }; +} + bool VKGSRender::is_current_program_interpreted() const { return m_program && m_shader_interpreter.is_interpreter(m_program); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 61dc496402..107da8ebf2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -26,6 +26,9 @@ using namespace vk::vmm_allocation_pool_; // clang workaround. using namespace vk::upscaling_flags_; // ditto +using vs_binding_table_t = decltype(VKVertexProgram::binding_table); +using fs_binding_table_t = decltype(VKFragmentProgram::binding_table); + namespace vk { using host_data_t = rsx::host_gpu_context_t; @@ -53,6 +56,9 @@ private: vk::glsl::program *m_prev_program = nullptr; vk::pipeline_props m_pipeline_properties; + const vs_binding_table_t* m_vs_binding_table = nullptr; + const fs_binding_table_t* m_fs_binding_table = nullptr; + vk::texture_cache m_texture_cache; vk::surface_cache m_rtts; @@ -78,6 +84,8 @@ private: VkDependencyInfoKHR m_async_compute_dependency_info {}; VkMemoryBarrier2KHR m_async_compute_memory_barrier {}; + std::pair get_binding_table() const; + public: //vk::fbo draw_fbo; std::unique_ptr m_vertex_cache; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index e7c4862dbc..4ab6a0fa3d 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -12,23 +12,70 @@ namespace vk { - glsl::shader* shader_interpreter::build_vs(u64 compiler_options) + u32 shader_interpreter::init(VKVertexProgram* vk_prog, u64 compiler_options) const + { + std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); + + u32 location = 0; + vk_prog->binding_table.vertex_buffers_location = location; + location += 3; + + vk_prog->binding_table.context_buffer_location = location++; + + if (vk::emulate_conditional_rendering()) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) + { + vk_prog->binding_table.instanced_lut_buffer_location = location++; + vk_prog->binding_table.instanced_cbuf_location = location++; + } + else + { + vk_prog->binding_table.cbuf_location = location++; + } + + if (vk::emulate_conditional_rendering()) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + // Return next index + return location; + } + + u32 shader_interpreter::init(VKFragmentProgram* vk_prog, u64 compiler_opt) const + { + std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); + + vk_prog->binding_table.context_buffer_location = 0; + vk_prog->binding_table.tex_param_location = 1; + vk_prog->binding_table.polygon_stipple_params_location = 2; + + // Return next index + return 3; + } + + VKVertexProgram* shader_interpreter::build_vs(u64 compiler_options) { ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_vertex_program; properties.require_lit_emulation = true; - // TODO: Extend decompiler thread - // TODO: Rename decompiler thread, it no longer spawns a thread RSXVertexProgram null_prog; std::string shader_str; ParamArray arr; - VKVertexProgram vk_prog; + + // Initialize binding layout + auto vk_prog = std::make_unique(); + m_vertex_instruction_start = init(vk_prog.get(), compiler_options); null_prog.ctrl = (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) ? RSX_SHADER_CONTROL_INSTANCED_CONSTANTS : 0; - VKVertexDecompilerThread comp(null_prog, shader_str, arr, vk_prog); + VKVertexDecompilerThread comp(null_prog, shader_str, arr, *vk_prog); // Initialize compiler properties comp.properties.has_indexed_constants = true; @@ -52,6 +99,12 @@ namespace vk " uvec4 vp_instructions[];\n" "};\n\n"; + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES) + { + // FIXME: Unimplemented + rsx_log.todo("Vertex textures are currently not implemented for the shader interpreter."); + } + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) { builder << "#define _ENABLE_INSTANCED_CONSTANTS\n"; @@ -68,48 +121,29 @@ namespace vk builder << program_common::interpreter::get_vertex_interpreter(); const std::string s = builder.str(); - auto vs = std::make_unique(); + auto vs = &vk_prog->shader; vs->create(::glsl::program_domain::glsl_vertex_program, s); vs->compile(); - // Prepare input table - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + // Declare local inputs + auto vs_inputs = comp.get_inputs(); + vk::glsl::program_input in; - - in.location = binding_table.vertex_params_bind_slot; + in.set = 0; in.domain = ::glsl::glsl_vertex_program; - in.name = "VertexContextBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_vs_inputs.push_back(in); + in.location = m_vertex_instruction_start; + in.type = glsl::input_type_storage_buffer; + in.name = "VertexInstructionBlock"; + vs_inputs.push_back(in); - in.location = binding_table.vertex_buffers_first_bind_slot; - in.name = "persistent_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); + vk_prog->SetInputs(vs_inputs); - in.location = binding_table.vertex_buffers_first_bind_slot + 1; - in.name = "volatile_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); - - in.location = binding_table.vertex_buffers_first_bind_slot + 2; - in.name = "vertex_layout_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); - - in.location = binding_table.vertex_constant_buffers_bind_slot; - in.name = "VertexConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_vs_inputs.push_back(in); - - // TODO: Bind textures if needed - - auto ret = vs.get(); - m_shader_cache[compiler_options].m_vs = std::move(vs); + auto ret = vk_prog.get(); + m_shader_cache[compiler_options].m_vs = std::move(vk_prog); return ret; } - glsl::shader* shader_interpreter::build_fs(u64 compiler_options) + VKFragmentProgram* shader_interpreter::build_fs(u64 compiler_options) { [[maybe_unused]] ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_fragment_program; @@ -120,10 +154,13 @@ namespace vk ParamArray arr; std::string shader_str; RSXFragmentProgram frag; - VKFragmentProgram vk_prog; - VKFragmentDecompilerThread comp(shader_str, arr, frag, len, vk_prog); - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + auto vk_prog = std::make_unique(); + m_fragment_instruction_start = init(vk_prog.get(), compiler_options); + m_fragment_textures_start = m_fragment_instruction_start + 1; + + VKFragmentDecompilerThread comp(shader_str, arr, frag, len, *vk_prog); + std::stringstream builder; builder << "#version 450\n" @@ -199,7 +236,7 @@ namespace vk for (int i = 0, bind_location = m_fragment_textures_start; i < 4; ++i) { - builder << "layout(set=0, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; + builder << "layout(set=1, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; } builder << "\n" @@ -211,7 +248,7 @@ namespace vk } builder << - "layout(std430, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" + "layout(std430, set=1, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" "{\n" " uint shader_control;\n" " uint texture_control;\n" @@ -223,140 +260,35 @@ namespace vk builder << program_common::interpreter::get_fragment_interpreter(); const std::string s = builder.str(); - auto fs = std::make_unique(); + auto fs = &vk_prog->shader; fs->create(::glsl::program_domain::glsl_fragment_program, s); fs->compile(); - // Prepare input table + // Declare local inputs + auto inputs = comp.get_inputs(); + vk::glsl::program_input in; - in.location = binding_table.fragment_constant_buffers_bind_slot; + in.set = 1; in.domain = ::glsl::glsl_fragment_program; - in.name = "FragmentConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_fs_inputs.push_back(in); - - in.location = binding_table.fragment_state_bind_slot; - in.name = "FragmentStateBuffer"; - m_fs_inputs.push_back(in); - - in.location = binding_table.fragment_texture_params_bind_slot; - in.name = "TextureParametersBuffer"; - m_fs_inputs.push_back(in); + in.location = m_fragment_instruction_start; + in.type = glsl::input_type_storage_buffer; + in.name = "FragmentInstructionBlock"; + inputs.push_back(in); for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) { in.location = location; in.name = std::string(type_names[i]) + "_array[16]"; - m_fs_inputs.push_back(in); + in.type = glsl::input_type_texture; + inputs.push_back(in); } - auto ret = fs.get(); - m_shader_cache[compiler_options].m_fs = std::move(fs); + vk_prog->SetInputs(inputs); + + auto ret = vk_prog.get(); + m_shader_cache[compiler_options].m_fs = std::move(vk_prog); return ret; } -/* - std::pair shader_interpreter::create_layout(VkDevice dev) - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - auto bindings = get_common_binding_table(); - u32 idx = ::size32(bindings); - - bindings.resize(binding_table.total_descriptor_bindings); - - // Texture 1D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - m_fragment_textures_start = bindings[idx].binding; - idx++; - - // Texture 2D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 1; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Texture 3D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 2; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Texture CUBE array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 3; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Vertex texture array (2D only) - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 4; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 4; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Vertex program ucode block - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 5; - bindings[idx].pImmutableSamplers = nullptr; - - m_vertex_instruction_start = bindings[idx].binding; - idx++; - - // Fragment program ucode block - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 6; - bindings[idx].pImmutableSamplers = nullptr; - - m_fragment_instruction_start = bindings[idx].binding; - idx++; - bindings.resize(idx); - - m_descriptor_pool_sizes = get_descriptor_pool_sizes(bindings); - - std::array push_constants; - push_constants[0].offset = 0; - push_constants[0].size = 16; - push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - - if (vk::emulate_conditional_rendering()) - { - // Conditional render toggle - push_constants[0].size = 20; - } - - const auto set_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &set_layout; - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = push_constants.data(); - - VkPipelineLayout result; - CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return { set_layout, result }; - } -*/ void shader_interpreter::init(const vk::render_device& dev) { @@ -366,19 +298,14 @@ namespace vk void shader_interpreter::destroy() { m_program_cache.clear(); - - for (auto &fs : m_shader_cache) - { - fs.second.m_vs->destroy(); - fs.second.m_fs->destroy(); - } - m_shader_cache.clear(); } glsl::program* shader_interpreter::link(const vk::pipeline_props& properties, u64 compiler_opt) { - glsl::shader *fs, *vs; + VKVertexProgram* vs; + VKFragmentProgram* fs; + if (auto found = m_shader_cache.find(compiler_opt); found != m_shader_cache.end()) { fs = found->second.m_fs.get(); @@ -393,12 +320,12 @@ namespace vk VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = vs->get_handle(); + shader_stages[0].module = vs->shader.get_handle(); shader_stages[0].pName = "main"; shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = fs->get_handle(); + shader_stages[1].module = fs->shader.get_handle(); shader_stages[1].pName = "main"; std::vector dynamic_state_descriptors = @@ -464,7 +391,13 @@ namespace vk info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); + auto program = compiler->compile( + info, + vk::pipe_compiler::COMPILE_INLINE | vk::pipe_compiler::SEPARATE_SHADER_OBJECTS, + {}, + vs->uniforms, + fs->uniforms); + return program.release(); } @@ -486,7 +419,8 @@ namespace vk glsl::program* shader_interpreter::get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, + const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, u32 vp_ctrl, u32 fp_ctrl) { @@ -526,11 +460,12 @@ namespace vk if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; - if (metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; - if (metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; - if (metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; + if (fp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + if (fp_metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; + if (fp_metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; if (rsx::method_registers.polygon_stipple_enabled()) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING; if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING; + if (vp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES; if (m_current_key == key) [[likely]] { @@ -567,4 +502,16 @@ namespace vk { return m_fragment_instruction_start; } + + std::pair shader_interpreter::get_shaders() const + { + if (auto found = m_shader_cache.find(m_current_key.compiler_opt); found != m_shader_cache.end()) + { + auto fs = found->second.m_fs.get(); + auto vs = found->second.m_vs.get(); + return { vs, fs }; + } + + return { nullptr, nullptr }; + } }; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h index aeaad698fb..9d934b3ffa 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -5,6 +5,9 @@ #include "vkutils/descriptors.h" #include +class VKVertexProgram; +class VKFragmentProgram; + namespace vk { using ::program_hash_util::fragment_program_utils; @@ -12,9 +15,6 @@ namespace vk class shader_interpreter { - std::vector m_vs_inputs; - std::vector m_fs_inputs; - VkDevice m_device = VK_NULL_HANDLE; glsl::program* m_current_interpreter = nullptr; @@ -39,8 +39,8 @@ namespace vk struct shader_cache_entry_t { - std::unique_ptr m_fs; - std::unique_ptr m_vs; + std::unique_ptr m_fs; + std::unique_ptr m_vs; }; std::unordered_map, key_hasher> m_program_cache; @@ -52,20 +52,27 @@ namespace vk pipeline_key m_current_key{}; - glsl::shader* build_vs(u64 compiler_opt); - glsl::shader* build_fs(u64 compiler_opt); + VKVertexProgram* build_vs(u64 compiler_opt); + VKFragmentProgram* build_fs(u64 compiler_opt); glsl::program* link(const vk::pipeline_props& properties, u64 compiler_opt); + u32 init(VKVertexProgram* vk_prog, u64 compiler_opt) const; + u32 init(VKFragmentProgram* vk_prog, u64 compiler_opt) const; + public: void init(const vk::render_device& dev); void destroy(); glsl::program* get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, + const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, u32 vp_ctrl, u32 fp_ctrl); + // Retrieve the shader components that make up the current interpreter + std::pair get_shaders() const; + bool is_interpreter(const glsl::program* prog) const; u32 get_vertex_instruction_location() const; diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 85d9148834..907d692e85 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -813,7 +813,6 @@ namespace vk memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); - m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); if (g_cfg.video.disable_vulkan_mem_allocator) { @@ -1148,15 +1147,4 @@ namespace vk return result; } - - pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev) - { - pipeline_binding_table result{}; - - // Need to check how many samplers are supported by the driver - const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u); - result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers; - result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4; - return result; - } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 63e30d3d42..0511802aac 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -137,7 +137,6 @@ namespace vk physical_device* pgpu = nullptr; memory_type_mapping memory_map{}; gpu_formats_support m_formats_support{}; - pipeline_binding_table m_pipeline_binding_table{}; std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; @@ -168,7 +167,6 @@ namespace vk const physical_device& gpu() const { return *pgpu; } const memory_type_mapping& get_memory_mapping() const { return memory_map; } const gpu_formats_support& get_formats_support() const { return m_formats_support; } - const pipeline_binding_table& get_pipeline_binding_table() const { return m_pipeline_binding_table; } const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; } const custom_border_color_features& get_custom_border_color_support() const { return pgpu->custom_border_color_support; } const multidraw_features get_multidraw_support() const { return pgpu->multidraw_support; } @@ -206,7 +204,6 @@ namespace vk memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); - pipeline_binding_table get_pipeline_binding_table(const physical_device& dev); extern const render_device* g_render_device; } From 396c4bbdd7202f4debd0ae422416b631bd6e218c Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 19:01:38 +0300 Subject: [PATCH 23/30] vk: Drop obsolete logic around descriptor switching --- rpcs3/Emu/RSX/VK/VKDraw.cpp | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 008d51f4bb..ae5348c52f 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -837,36 +837,6 @@ void VKGSRender::emit_geometry(u32 sub_index) vk::clear_status_interrupt(vk::heap_changed); } } - else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) - { - /* - // Need to update descriptors; make a copy for the next draw - VkDescriptorSet previous_set = m_current_frame->descriptor_set.value(); - m_current_frame->descriptor_set.flush(); - m_current_frame->descriptor_set = allocate_descriptor_set(); - rsx::simple_array copy_cmds(binding_table.total_descriptor_bindings); - - for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) - { - copy_cmds[n] = - { - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType - nullptr, // pNext - previous_set, // srcSet - n, // srcBinding - 0u, // srcArrayElement - m_current_frame->descriptor_set.value(), // dstSet - n, // dstBinding - 0u, // dstArrayElement - 1u // descriptorCount - }; - } - - m_current_frame->descriptor_set.push(copy_cmds); - update_descriptors = true; - */ - fmt::throw_exception("Not implemented"); - } // Update vertex fetch parameters update_vertex_env(sub_index, upload_info); From 5d6b8b20c4ce0afc0410235ac309ee7271a158c4 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 22 Jun 2025 20:01:39 +0300 Subject: [PATCH 24/30] vk: Fix binding of arrays --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 51 +++++++++++++++++++++--- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 10 ++++- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 2 +- 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 967828655e..bd12828e08 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -339,14 +339,23 @@ namespace vk void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 set_id, u32 binding_point) { + // Non-caching write auto& set = m_sets[set_id]; + auto& arr = set.m_scratch_images_array; + + descriptor_array_ref_t data + { + .first = arr.size(), + .count = static_cast(count) + }; + + arr.reserve(arr.size() + static_cast(count)); for (int i = 0; i < count; ++i) { - if (set.m_descriptor_slots[binding_point + i] != image_descriptors[i]) - { - set.notify_descriptor_slot_updated(binding_point + i, image_descriptors[i]); - } + arr.push_back(image_descriptors[i]); } + + set.notify_descriptor_slot_updated(binding_point, data); } void program::create_pipeline_layout() @@ -499,6 +508,14 @@ namespace vk return; } + if (auto ptr = std::get_if(&slot)) + { + ensure(type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); // Only type supported at the moment + ensure((ptr->first + ptr->count) <= m_scratch_images_array.size()); + m_descriptor_set.push(m_scratch_images_array.data() + ptr->first, ptr->count, type, idx); + return; + } + fmt::throw_exception("Unexpected descriptor structure at index %u", idx); }; @@ -521,6 +538,7 @@ namespace vk m_descriptor_set.on_bind(); m_any_descriptors_dirty = false; + m_scratch_images_array.clear(); return m_descriptor_set.value(); } @@ -537,6 +555,27 @@ namespace vk std::unordered_map descriptor_type_map; + auto descriptor_count = [](const std::string& name) -> u32 + { + const auto start = name.find_last_of("["); + if (start == std::string::npos) + { + return 1; + } + + const auto end = name.find_last_of("]"); + ensure(end != std::string::npos && start < end, "Invalid variable name"); + + const std::string array_size = name.substr(start + 1, end - start - 1); + if (const auto count = std::atoi(array_size.c_str()); + count > 0) + { + return count; + } + + return 1; + }; + for (const auto& type_arr : m_inputs) { if (type_arr.empty() || type_arr.front().type == input_type_push_constant) @@ -553,13 +592,13 @@ namespace vk { .binding = input.location, .descriptorType = type, - .descriptorCount = 1, + .descriptorCount = descriptor_count(input.name), .stageFlags = to_shader_stage_flags(input.domain) }; bindings.push_back(binding); descriptor_type_map[input.location] = type; - m_descriptor_pool_sizes.back().descriptorCount++; + m_descriptor_pool_sizes.back().descriptorCount += binding.descriptorCount; } } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 81c3ff8525..4b38d23fb8 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -109,7 +109,13 @@ namespace vk VkShaderModule get_handle() const; }; - using descriptor_slot_t = std::variant; + struct descriptor_array_ref_t + { + u32 first = 0; + u32 count = 0; + }; + + using descriptor_slot_t = std::variant; struct descriptor_table_t { @@ -126,6 +132,8 @@ namespace vk std::vector m_descriptors_dirty; bool m_any_descriptors_dirty = false; + rsx::simple_array< VkDescriptorImageInfo> m_scratch_images_array; + void init(VkDevice dev); void destroy(); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 4ab6a0fa3d..760e069fb6 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -404,7 +404,7 @@ namespace vk void shader_interpreter::update_fragment_textures(const std::array& sampled_images) { // FIXME: Cannot use m_fragment_textures.start now since each interpreter has its own binding layout - auto [set, binding] = m_current_interpreter->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "texture1D_array"); + auto [set, binding] = m_current_interpreter->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "sampler1D_array[16]"); if (binding == umax) { return; From 37c4406b23a1cb9a45a787a7a930686feb26b36e Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 23 Jun 2025 00:57:30 +0300 Subject: [PATCH 25/30] Remove unused file --- rpcs3/Emu/RSX/VK/VKProgramHelper.hpp | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 rpcs3/Emu/RSX/VK/VKProgramHelper.hpp diff --git a/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp b/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp deleted file mode 100644 index 328df80f1d..0000000000 --- a/rpcs3/Emu/RSX/VK/VKProgramHelper.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include "VKProgramPipeline.h" - -namespace vk -{ - namespace glsl - { - - } -} - From 8130babad3ebe23d1f7899348b4c2af8372b67e7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 23 Jun 2025 01:36:18 +0300 Subject: [PATCH 26/30] vk: Fix crash when running MSAA resolve shaders --- rpcs3/Emu/RSX/VK/VKResolveHelper.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index a9064eff95..05ea997096 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -16,7 +16,9 @@ namespace vk u32 cs_wave_y = 1; cs_resolve_base() - {} + { + ssbo_count = 0; + } virtual ~cs_resolve_base() {} From 3f635033cce8ee4990d7c3321fe8a6509c9e1922 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Mon, 23 Jun 2025 01:43:31 +0300 Subject: [PATCH 27/30] vk: Cleanup compiler warnings --- rpcs3/Emu/RSX/VK/VKCompute.cpp | 2 +- rpcs3/Emu/RSX/VK/VKCompute.h | 2 +- rpcs3/Emu/RSX/VK/VKDraw.cpp | 3 --- rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp | 3 --- rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp | 2 +- rpcs3/Emu/RSX/VK/VKProgramPipeline.h | 4 ++-- rpcs3/Emu/RSX/VK/VKResolveHelper.h | 2 +- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 4 ++-- rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp | 2 +- 9 files changed, 9 insertions(+), 15 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 9d91773988..3df2c9695a 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -353,7 +353,7 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void cs_aggregator::bind_resources(const vk::command_buffer& cmd) + void cs_aggregator::bind_resources(const vk::command_buffer& /*cmd*/) { m_program->bind_uniform({ src->value, 0, block_length }, 0, 0); m_program->bind_uniform({ dst->value, 0, 4 }, 0, 1); diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 0dfa80f4f5..5d20a60391 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -35,7 +35,7 @@ namespace vk void destroy(); virtual std::vector get_inputs(); - virtual void bind_resources(const vk::command_buffer& cmd) {} + virtual void bind_resources(const vk::command_buffer& /*cmd*/) {} void load_program(const vk::command_buffer& cmd); diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index ae5348c52f..2a6d195bef 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -761,9 +761,6 @@ void VKGSRender::emit_geometry(u32 sub_index) return; } - const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; - // Programs data is dependent on vertex state auto upload_info = upload_vertex_data(); if (!upload_info.vertex_draw_count) diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 94012840ec..040d528218 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -62,7 +62,6 @@ void VKFragmentDecompilerThread::prepareBindingTable() if (has_textures) [[ likely ]] { - unsigned num_textures = 0; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { if (!PT.type.starts_with("sampler")) @@ -72,8 +71,6 @@ void VKFragmentDecompilerThread::prepareBindingTable() for (const ParamItem& PI : PT.items) { - num_textures++; - const auto texture_id = vk::get_texture_index(PI.name); const auto mask = 1u << texture_id; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index bd12828e08..aae8407103 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -337,7 +337,7 @@ namespace vk m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_view); } - void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 set_id, u32 binding_point) + void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point) { // Non-caching write auto& set = m_sets[set_id]; diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 4b38d23fb8..c7faaa8c37 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -170,7 +170,7 @@ namespace vk class program { - VkDevice m_device = VK_NULL_HANDLE; + VkDevice m_device = VK_NULL_HANDLE; VkPipeline m_pipeline = VK_NULL_HANDLE; VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; @@ -202,7 +202,7 @@ namespace vk void bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point); void bind_uniform(const VkBufferView &buffer_view, ::glsl::program_domain domain, program_input_type type, const std::string &binding_name); - void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, VkDescriptorType type, int count, u32 set_id, u32 binding_point); + void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point); inline VkPipelineLayout layout() const { return m_pipeline_layout; } inline VkPipeline value() const { return m_pipeline; } diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 05ea997096..23d243b032 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -51,7 +51,7 @@ namespace vk return result; } - void bind_resources(const vk::command_buffer& cmd) override + void bind_resources(const vk::command_buffer& /*cmd*/) override { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 760e069fb6..d389423dd2 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -46,7 +46,7 @@ namespace vk return location; } - u32 shader_interpreter::init(VKFragmentProgram* vk_prog, u64 compiler_opt) const + u32 shader_interpreter::init(VKFragmentProgram* vk_prog, u64 /*compiler_opt*/) const { std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); @@ -413,7 +413,7 @@ namespace vk const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); for (u32 i = 0; i < 4; ++i, ++binding, texture_ptr += 16) { - m_current_interpreter->bind_uniform_array(texture_ptr, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 16, set, binding); + m_current_interpreter->bind_uniform_array(texture_ptr, 16, set, binding); } } diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index 23a4733c60..dc6562289e 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -97,7 +97,7 @@ namespace vk return result; } - void fsr_pass::bind_resources(const vk::command_buffer& cmd) + void fsr_pass::bind_resources(const vk::command_buffer& /*cmd*/) { // Bind relevant stuff if (!m_sampler) From 1f0328c5d5fc4ba60e552734a3e545ed5c0643d7 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 24 Jun 2025 03:59:29 +0300 Subject: [PATCH 28/30] vk: Fix up binding layouts for some broken shaders --- rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl | 4 ++-- rpcs3/Emu/RSX/VK/VKOverlays.cpp | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl index de1992174f..84fdfdb8b7 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl @@ -14,8 +14,8 @@ R"( #define SAMPLER_MODE_TEXTURE2D 3 #ifdef VULKAN - layout(set=0, binding=1) uniform sampler2D fs0; - layout(set=0, binding=2) uniform sampler2DArray fs1; + layout(set=0, binding=0) uniform sampler2D fs0; + layout(set=0, binding=1) uniform sampler2DArray fs1; #else layout(binding=31) uniform sampler2D fs0; layout(binding=30) uniform sampler2DArray fs1; diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index 743339e26c..70d9a7ab49 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -310,6 +310,7 @@ namespace vk // 2 input textures m_num_usable_samplers = 2; + m_num_uniform_buffers = 0; renderpass_config.set_attachment_count(1); renderpass_config.set_color_mask(0, true, true, true, true); @@ -807,6 +808,9 @@ namespace vk "{\n" " out_color = vec4(0.);\n" "}\n"; + + m_num_uniform_buffers = 0; + m_num_usable_samplers = 0; } void stencil_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -865,6 +869,7 @@ namespace vk renderpass_config.set_attachment_count(1); m_num_usable_samplers = 2; + m_num_uniform_buffers = 0; } std::vector video_out_calibration_pass::get_fragment_inputs() From 3df93dcc19cef542d3080d07b61873365f7993ac Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 24 Jun 2025 04:22:36 +0300 Subject: [PATCH 29/30] vk: Fix shader interpreter inputs when textures are not used --- rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index d389423dd2..c80fb7873d 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -275,12 +275,15 @@ namespace vk in.name = "FragmentInstructionBlock"; inputs.push_back(in); - for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) { - in.location = location; - in.name = std::string(type_names[i]) + "_array[16]"; - in.type = glsl::input_type_texture; - inputs.push_back(in); + for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) + { + in.location = location; + in.name = std::string(type_names[i]) + "_array[16]"; + in.type = glsl::input_type_texture; + inputs.push_back(in); + } } vk_prog->SetInputs(inputs); From aa50b0fbb99c04918645a78acd1fcab8a1cf31b2 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 25 Jun 2025 01:44:04 +0300 Subject: [PATCH 30/30] vk: Fix video-out calibration pass inputs --- rpcs3/Emu/RSX/VK/VKOverlays.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index 70d9a7ab49..34ca64ca66 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -859,7 +859,7 @@ namespace vk std::pair repl_list[] = { - { "%sampler_binding", fmt::format("(%d + x)", sampler_location(0)) }, + { "%sampler_binding", "x" }, { "%set_decorator", "set=0" }, }; fs_src = fmt::replace_all(fs_src, repl_list);