diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index dfec324eeb..033994547d 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -285,6 +285,13 @@ namespace rsx return pos; } + void operator += (const rsx::simple_array& that) + { + const auto old_size = _size; + resize(_size + that._size); + std::memcpy(data() + old_size, that.data(), that.size_bytes()); + } + void clear() { _size = 0; @@ -305,6 +312,11 @@ namespace rsx return _size * sizeof(Ty); } + u32 size_bytes32() const + { + return _size * sizeof(Ty); + } + u32 capacity() const { return _capacity; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl index de1992174f..84fdfdb8b7 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl @@ -14,8 +14,8 @@ R"( #define SAMPLER_MODE_TEXTURE2D 3 #ifdef VULKAN - layout(set=0, binding=1) uniform sampler2D fs0; - layout(set=0, binding=2) uniform sampler2DArray fs1; + layout(set=0, binding=0) uniform sampler2D fs0; + layout(set=0, binding=1) uniform sampler2DArray fs1; #else layout(binding=31) uniform sampler2D fs0; layout(binding=30) uniform sampler2DArray fs1; diff --git a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h index 5503a2870c..f89c058dec 100644 --- a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h @@ -20,8 +20,9 @@ namespace program_common COMPILER_OPT_ENABLE_KIL = (1 << 11), COMPILER_OPT_ENABLE_STIPPLING = (1 << 12), COMPILER_OPT_ENABLE_INSTANCING = (1 << 13), + COMPILER_OPT_ENABLE_VTX_TEXTURES = (1 << 14), - COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_INSTANCING + COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_VTX_TEXTURES }; static std::string get_vertex_interpreter() diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 01e5cc07aa..815492caa3 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -35,4 +35,31 @@ namespace vk fmt::throw_exception("Unknown register name: %s", varying_register_name); } + + int get_texture_index(std::string_view name) + { + if (name.length() < 2) + { + fmt::throw_exception("Invalid texture name: '%s'", name); + } + + constexpr int max_index_length = 2; + const int name_length = static_cast(name.length()); + std::string index; + + for (int char_idx = name_length - max_index_length; char_idx < name_length; ++char_idx) + { + if (std::isdigit(name[char_idx])) + { + index += name[char_idx]; + } + } + + if (index.empty()) + { + fmt::throw_exception("Invalid texture name: '%s'", name); + } + + return std::atoi(index.c_str()); + } } diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h index b0920e27f5..b17eb83b11 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -6,4 +6,6 @@ namespace vk using namespace ::glsl; int get_varying_register_location(std::string_view varying_register_name); + + int get_texture_index(std::string_view name); } diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp index 602d855d76..76cda4d253 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp @@ -8,157 +8,6 @@ namespace vk { - rsx::simple_array get_common_binding_table() - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - rsx::simple_array bindings(binding_table.instancing_constants_buffer_slot + 1); - - u32 idx = 0; - - // Vertex stream, one stream for cacheable data, one stream for transient data - for (int i = 0; i < 3; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_state_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.fragment_texture_params_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; - bindings[idx].binding = binding_table.vertex_params_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.conditional_render_predicate_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.rasterizer_env_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.instancing_constants_buffer_slot; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - return bindings; - } - - std::tuple> - get_common_pipeline_layout(VkDevice dev) - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - auto bindings = get_common_binding_table(); - u32 idx = ::size32(bindings); - - bindings.resize(binding_table.total_descriptor_bindings); - - for (auto binding = binding_table.textures_first_bind_slot; - binding < binding_table.vertex_textures_first_bind_slot; - binding++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; i++) - { - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i; - bindings[idx].pImmutableSamplers = nullptr; - idx++; - } - - ensure(idx == binding_table.total_descriptor_bindings); - - std::array push_constants; - push_constants[0].offset = 0; - push_constants[0].size = 20; - push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - - if (vk::emulate_conditional_rendering()) - { - // Conditional render toggle - push_constants[0].size = 24; - } - - const auto set_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &set_layout; - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = push_constants.data(); - - VkPipelineLayout result; - CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return std::make_tuple(result, set_layout, bindings); - } - rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings) { // Compile descriptor pool sizes diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h index 371d0ebf76..e5ada45bf8 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h @@ -5,13 +5,6 @@ namespace vk { - // Grab standard layout for decompiled RSX programs. Also used by the interpreter. - // FIXME: This generates a bloated monstrosity that needs to die. - std::tuple> get_common_pipeline_layout(VkDevice dev); - - // Returns the standard binding layout without texture slots. Those have special handling depending on the consumer. - rsx::simple_array get_common_binding_table(); - // Returns an array of pool sizes that can be used to generate a proper descriptor pool rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 637642d8bf..3df2c9695a 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -8,64 +8,43 @@ namespace vk { - std::vector> compute_task::get_descriptor_layout() + std::vector compute_task::get_inputs() { - std::vector> result; - result.emplace_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ssbo_count); + std::vector result; + for (unsigned i = 0; i < ssbo_count; ++i) + { + const auto input = glsl::program_input::make + ( + ::glsl::glsl_compute_program, + "ssbo" + std::to_string(i), + glsl::program_input_type::input_type_storage_buffer, + 0, + i + ); + result.push_back(input); + } + + if (use_push_constants && push_constants_size > 0) + { + const auto input = glsl::program_input::make + ( + ::glsl::glsl_compute_program, + "push_constants", + glsl::program_input_type::input_type_push_constant, + 0, + 0, + glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } + ); + result.push_back(input); + } + return result; } - void compute_task::init_descriptors() - { - rsx::simple_array descriptor_pool_sizes; - rsx::simple_array bindings; - - const auto layout = get_descriptor_layout(); - for (const auto &e : layout) - { - descriptor_pool_sizes.push_back({e.first, e.second}); - - for (unsigned n = 0; n < e.second; ++n) - { - bindings.push_back - ({ - u32(bindings.size()), - e.first, - 1, - VK_SHADER_STAGE_COMPUTE_BIT, - nullptr - }); - } - } - - // Reserve descriptor pools - m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes); - m_descriptor_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &m_descriptor_layout; - - VkPushConstantRange push_constants{}; - if (use_push_constants) - { - push_constants.size = push_constants_size; - push_constants.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = &push_constants; - } - - CHECK_RESULT(vkCreatePipelineLayout(*g_render_device, &layout_info, nullptr, &m_pipeline_layout)); - } - void compute_task::create() { if (!initialized) { - init_descriptors(); - switch (vk::get_driver_vendor()) { case vk::driver_vendor::unknown: @@ -121,10 +100,6 @@ namespace vk m_program.reset(); m_param_buffer.reset(); - vkDestroyDescriptorSetLayout(*g_render_device, m_descriptor_layout, nullptr); - vkDestroyPipelineLayout(*g_render_device, m_pipeline_layout, nullptr); - m_descriptor_pool.destroy(); - initialized = false; } } @@ -142,26 +117,23 @@ namespace vk shader_stage.module = handle; shader_stage.pName = "main"; - VkComputePipelineCreateInfo info{}; - info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - info.stage = shader_stage; - info.layout = m_pipeline_layout; - info.basePipelineIndex = -1; - info.basePipelineHandle = VK_NULL_HANDLE; + VkComputePipelineCreateInfo create_info + { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = handle, + .pName = "main" + }, + }; auto compiler = vk::get_pipe_compiler(); - m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE); - declare_inputs(); + m_program = compiler->compile(create_info, vk::pipe_compiler::COMPILE_INLINE, {}, get_inputs()); } - ensure(m_used_descriptors < VK_MAX_COMPUTE_TASKS); - - m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout, VK_TRUE); - - bind_resources(); - - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_program->pipeline); - m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout); + bind_resources(cmd); + m_program->bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE); } void compute_task::run(const vk::command_buffer& cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) @@ -271,15 +243,19 @@ namespace vk m_src += suffix; } - void cs_shuffle_base::bind_resources() + void cs_shuffle_base::bind_resources(const vk::command_buffer& cmd) { - m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + set_parameters(cmd); + m_program->bind_uniform({ m_data->value, m_data_offset, m_data_length }, 0, 0); } - void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) + void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd) { - ensure(use_push_constants); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); + if (!m_params.empty()) + { + ensure(use_push_constants); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, m_params.size_bytes32(), m_params.data()); + } } void cs_shuffle_base::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset) @@ -317,15 +293,15 @@ namespace vk " uint stencil_offset;\n"; } - void cs_interleave_task::bind_resources() + void cs_interleave_task::bind_resources(const vk::command_buffer& cmd) { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + set_parameters(cmd); + m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } void cs_interleave_task::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) { - u32 parameters[4] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; - set_parameters(cmd, parameters, 4); + m_params = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; ensure(stencil_offset > data_offset); m_ssbo_length = stencil_offset + (data_length / 4) - data_offset; @@ -377,10 +353,10 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void cs_aggregator::bind_resources() + void cs_aggregator::bind_resources(const vk::command_buffer& /*cmd*/) { - m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_uniform({ src->value, 0, block_length }, 0, 0); + m_program->bind_uniform({ dst->value, 0, 4 }, 0, 1); } void cs_aggregator::run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 4f9a3f2a3a..5d20a60391 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -19,12 +19,6 @@ namespace vk std::unique_ptr m_program; std::unique_ptr m_param_buffer; - vk::descriptor_pool m_descriptor_pool; - descriptor_set m_descriptor_set; - VkDescriptorSetLayout m_descriptor_layout = nullptr; - VkPipelineLayout m_pipeline_layout = nullptr; - u32 m_used_descriptors = 0; - bool initialized = false; bool unroll_loops = true; bool use_push_constants = false; @@ -37,15 +31,11 @@ namespace vk compute_task() = default; virtual ~compute_task() { destroy(); } - virtual std::vector> get_descriptor_layout(); - - void init_descriptors(); - void create(); void destroy(); - virtual void bind_resources() {} - virtual void declare_inputs() {} + virtual std::vector get_inputs(); + virtual void bind_resources(const vk::command_buffer& /*cmd*/) {} void load_program(const vk::command_buffer& cmd); @@ -60,6 +50,8 @@ namespace vk u32 m_data_length = 0; u32 kernel_size = 1; + rsx::simple_array m_params; + std::string variables, work_kernel, loop_advance, suffix; std::string method_declarations; @@ -67,9 +59,9 @@ namespace vk void build(const char* function_name, u32 _kernel_size = 0); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; - void set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count); + void set_parameters(const vk::command_buffer& cmd); void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0); }; @@ -135,7 +127,7 @@ namespace vk cs_interleave_task(); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset); }; @@ -352,9 +344,10 @@ namespace vk cs_shuffle_base::build(""); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { - m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + set_parameters(cmd); + m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); } void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset) @@ -371,8 +364,7 @@ namespace vk data_offset = src_offset; } - u32 parameters[4] = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; - set_parameters(cmd, parameters, 4); + m_params = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; cs_shuffle_base::run(cmd, data, src_length, data_offset); } }; @@ -453,15 +445,17 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { - m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + set_parameters(cmd); + + m_program->bind_uniform({ src_buffer->value, in_offset, block_length }, 0, 0); + m_program->bind_uniform({ dst_buffer->value, out_offset, block_length }, 0, 1); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); } void run(const vk::command_buffer& cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 data_length, u32 width, u32 height, u32 depth, u32 mipmaps) override @@ -480,7 +474,6 @@ namespace vk params.logw = rsx::ceil_log2(width); params.logh = rsx::ceil_log2(height); params.logd = rsx::ceil_log2(depth); - set_parameters(cmd); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); @@ -497,7 +490,7 @@ namespace vk cs_aggregator(); - void bind_resources() override; + void bind_resources(const vk::command_buffer& cmd) override; void run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words); }; @@ -581,16 +574,18 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources() override + void bind_resources(const vk::command_buffer& cmd) override { - const auto op = static_cast(Op); - m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); - m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + set_parameters(cmd); + + const auto op = static_cast(Op); + m_program->bind_uniform({ src_buffer->value, in_offset, in_block_length }, 0u, 0u ^ op); + m_program->bind_uniform({ dst_buffer->value, out_offset, out_block_length }, 0u, 1u ^ op); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); } void run(const vk::command_buffer& cmd, const RSX_detiler_config& config) @@ -653,7 +648,6 @@ namespace vk params.image_height = (Op == RSX_detiler_op::decode) ? tile_aligned_height : config.image_height; params.image_pitch = config.image_pitch; params.image_bpp = config.image_bpp; - set_parameters(cmd); const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1; const u32 virtual_width = config.image_width / subtexels_per_invocation; diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 3b760f103f..2a6d195bef 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -554,9 +554,8 @@ bool VKGSRender::bind_texture_env() if (view) [[likely]] { m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + vk::glsl::binding_set_index_fragment, + m_fs_binding_table->ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { @@ -576,27 +575,22 @@ bool VKGSRender::bind_texture_env() } m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); + vk::glsl::binding_set_index_fragment, + m_fs_binding_table->ftex_stencil_location[i]); } } else { const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + vk::glsl::binding_set_index_fragment, + m_fs_binding_table->ftex_location[i]); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); + vk::glsl::binding_set_index_fragment, + m_fs_binding_table->ftex_stencil_location[i]); } } } @@ -610,9 +604,8 @@ bool VKGSRender::bind_texture_env() { const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + vk::glsl::binding_set_index_vertex, + m_vs_binding_table->vtex_location[i]); continue; } @@ -634,9 +627,8 @@ bool VKGSRender::bind_texture_env() const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + vk::glsl::binding_set_index_vertex, + m_vs_binding_table->vtex_location[i]); continue; } @@ -644,9 +636,8 @@ bool VKGSRender::bind_texture_env() validate_image_layout_for_read_access(*m_current_command_buffer, image_ptr, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, sampler_state); m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + vk::glsl::binding_set_index_vertex, + m_vs_binding_table->vtex_location[i]); } return out_of_memory; @@ -721,7 +712,7 @@ bool VKGSRender::bind_interpreter_texture_env() } } - m_shader_interpreter.update_fragment_textures(texture_env, m_current_frame->descriptor_set); + m_shader_interpreter.update_fragment_textures(texture_env); return out_of_memory; } @@ -770,9 +761,6 @@ void VKGSRender::emit_geometry(u32 sub_index) return; } - const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; - // Programs data is dependent on vertex state auto upload_info = upload_vertex_data(); if (!upload_info.vertex_draw_count) @@ -827,8 +815,6 @@ void VKGSRender::emit_geometry(u32 sub_index) auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; bool update_descriptors = false; - const auto& binding_table = m_device->get_pipeline_binding_table(); - if (m_current_draw.subdraw_id == 0) { update_descriptors = true; @@ -848,33 +834,6 @@ void VKGSRender::emit_geometry(u32 sub_index) vk::clear_status_interrupt(vk::heap_changed); } } - else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) - { - // Need to update descriptors; make a copy for the next draw - VkDescriptorSet previous_set = m_current_frame->descriptor_set.value(); - m_current_frame->descriptor_set.flush(); - m_current_frame->descriptor_set = allocate_descriptor_set(); - rsx::simple_array copy_cmds(binding_table.total_descriptor_bindings); - - for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) - { - copy_cmds[n] = - { - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType - nullptr, // pNext - previous_set, // srcSet - n, // srcBinding - 0u, // srcArrayElement - m_current_frame->descriptor_set.value(), // dstSet - n, // dstBinding - 0u, // dstArrayElement - 1u // descriptorCount - }; - } - - m_current_frame->descriptor_set.push(copy_cmds); - update_descriptors = true; - } // Update vertex fetch parameters update_vertex_env(sub_index, upload_info); @@ -882,9 +841,9 @@ void VKGSRender::emit_geometry(u32 sub_index) ensure(m_vertex_layout_storage); if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); + m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location); + m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 1); + m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2); } bool reload_state = (!m_current_draw.subdraw_id++); @@ -908,10 +867,12 @@ void VKGSRender::emit_geometry(u32 sub_index) reload_state = true; }); + // Bind both pipe and descriptors in one go + // FIXME: We only need to rebind the pipeline when reload state is set. Flags? + m_program->bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS); + if (reload_state) { - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - update_draw_state(); begin_render_pass(); @@ -929,7 +890,6 @@ void VKGSRender::emit_geometry(u32 sub_index) } // Bind the new set of descriptors for use with this draw call - m_current_frame->descriptor_set.bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline_layout); m_frame_stats.setup_time += m_profiler.duration(); if (!upload_info.index_info) @@ -1083,9 +1043,6 @@ void VKGSRender::end() return; } - // Allocate descriptor set - m_current_frame->descriptor_set = allocate_descriptor_set(); - // Load program execution environment load_program_env(); m_frame_stats.setup_time += m_profiler.duration(); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index dd654a6736..040d528218 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -26,8 +26,85 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st return glsl::compareFunctionImpl(f, Op0, Op1); } +void VKFragmentDecompilerThread::prepareBindingTable() +{ + // First check if we have constants and textures as those need extra work + bool has_constants = false, has_textures = false; + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (has_constants && has_textures) + { + break; + } + + if (PT.type.starts_with("sampler")) + { + has_textures = true; + continue; + } + + ensure(PT.type.starts_with("vec")); + has_constants = true; + } + + unsigned location = 0; // All bindings must be set from this var + vk_prog->binding_table.context_buffer_location = location++; + if (has_constants) + { + vk_prog->binding_table.cbuf_location = location++; + } + + vk_prog->binding_table.tex_param_location = location++; + vk_prog->binding_table.polygon_stipple_params_location = location++; + + std::memset(vk_prog->binding_table.ftex_location, 0xff, sizeof(vk_prog->binding_table.ftex_location)); + std::memset(vk_prog->binding_table.ftex_stencil_location, 0xff, sizeof(vk_prog->binding_table.ftex_stencil_location)); + + if (has_textures) [[ likely ]] + { + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + if (!PT.type.starts_with("sampler")) + { + continue; + } + + for (const ParamItem& PI : PT.items) + { + const auto texture_id = vk::get_texture_index(PI.name); + const auto mask = 1u << texture_id; + + // Allocate real binding + vk_prog->binding_table.ftex_location[texture_id] = location++; + + // Tag the stencil mirror if required + if (properties.redirected_sampler_mask & mask) [[ unlikely ]] + { + vk_prog->binding_table.ftex_stencil_location[texture_id] = 0; + } + } + + // Normalize stencil offsets + if (properties.redirected_sampler_mask != 0) [[ unlikely ]] + { + for (auto& stencil_location : vk_prog->binding_table.ftex_stencil_location) + { + if (stencil_location != 0) + { + continue; + } + + stencil_location = location++; + } + } + } + } +} + void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { + prepareBindingTable(); + std::vector required_extensions; if (device_props.has_native_half_support) @@ -97,21 +174,18 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { - u32 location = m_binding_table.textures_first_bind_slot; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type != "sampler1D" && - PT.type != "sampler2D" && - PT.type != "sampler3D" && - PT.type != "samplerCube") + if (!PT.type.starts_with("sampler")) + { continue; + } for (const ParamItem& PI : PT.items) { std::string samplerType = PT.type; - ensure(PI.name.length() > 3); - int index = atoi(&PI.name[3]); + const int index = vk::get_texture_index(PI.name); const auto mask = (1 << index); if (properties.multisampled_sampler_mask & mask) @@ -135,39 +209,37 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - vk::glsl::program_input in; - in.location = location; - in.domain = glsl::glsl_fragment_program; - in.name = PI.name; - in.type = vk::glsl::input_type_texture; - + const int id = vk::get_texture_index(PI.name); + auto in = vk::glsl::program_input::make( + glsl::glsl_fragment_program, + PI.name, + vk::glsl::input_type_texture, + vk::glsl::binding_set_index_fragment, + vk_prog->binding_table.ftex_location[id] + ); inputs.push_back(in); - OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=1, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; if (properties.redirected_sampler_mask & mask) { // Insert stencil mirror declaration in.name += "_stencil"; - in.location = location; - + in.location = vk_prog->binding_table.ftex_stencil_location[id]; inputs.push_back(in); - OS << "layout(set=0, binding=" << location++ << ") uniform u" << samplerType << " " << in.name << ";\n"; + OS << "layout(set=1, binding=" << in.location << ") uniform u" << samplerType << " " << in.name << ";\n"; } } } - ensure(location <= m_binding_table.vertex_textures_first_bind_slot); // "Too many sampler descriptors!" - std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") + if (PT.type.starts_with("sampler")) + { continue; + } for (const ParamItem& PI : PT.items) { @@ -177,13 +249,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (!constants_block.empty()) { - OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; OS << "{\n"; OS << constants_block; OS << "};\n\n"; } - OS << "layout(std140, set = 0, binding = 3) uniform FragmentStateBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; @@ -195,32 +267,39 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " float wpos_bias;\n"; OS << "};\n\n"; - OS << "layout(std140, set = 0, binding = 4) uniform TextureParametersBuffer\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; OS << "{\n"; OS << " sampler_info texture_parameters[16];\n"; OS << "};\n\n"; - OS << "layout(std140, set = 0, binding = " << std::to_string(m_binding_table.rasterizer_env_bind_slot) << ") uniform RasterizerHeap\n"; + OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.polygon_stipple_params_location << ") uniform RasterizerHeap\n"; OS << "{\n"; OS << " uvec4 stipple_pattern[8];\n"; OS << "};\n\n"; - vk::glsl::program_input in; - in.location = m_binding_table.fragment_constant_buffers_bind_slot; - in.domain = glsl::glsl_fragment_program; - in.name = "FragmentConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - inputs.push_back(in); + vk::glsl::program_input in + { + .domain = glsl::glsl_fragment_program, + .type = vk::glsl::input_type_uniform_buffer, + .set = vk::glsl::binding_set_index_fragment + }; - in.location = m_binding_table.fragment_state_bind_slot; + if (!constants_block.empty()) + { + in.location = vk_prog->binding_table.cbuf_location; + in.name = "FragmentConstantsBuffer"; + inputs.push_back(in); + } + + in.location = vk_prog->binding_table.context_buffer_location; in.name = "FragmentStateBuffer"; inputs.push_back(in); - in.location = m_binding_table.fragment_texture_params_bind_slot; + in.location = vk_prog->binding_table.tex_param_location; in.name = "TextureParametersBuffer"; inputs.push_back(in); - in.location = m_binding_table.rasterizer_env_bind_slot; + in.location = vk_prog->binding_table.polygon_stipple_params_location; in.name = "RasterizerHeap"; inputs.push_back(in); } @@ -372,7 +451,6 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void VKFragmentDecompilerThread::Task() { - m_binding_table = vk::g_render_device->get_pipeline_binding_table(); m_shader = Decompile(); vk_prog->SetInputs(inputs); } @@ -406,10 +484,7 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) { for (const ParamItem& PI : PT.items) { - if (PT.type == "sampler1D" || - PT.type == "sampler2D" || - PT.type == "sampler3D" || - PT.type == "samplerCube") + if (PT.type.starts_with("sampler")) continue; usz offset = atoi(PI.name.c_str() + 2); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index 787f38ec05..c51b81b8fc 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -10,7 +10,7 @@ namespace vk class shader_interpreter; } -struct VKFragmentDecompilerThread : public FragmentProgramDecompiler +class VKFragmentDecompilerThread : public FragmentProgramDecompiler { friend class vk::shader_interpreter; @@ -19,7 +19,8 @@ struct VKFragmentDecompilerThread : public FragmentProgramDecompiler std::vector inputs; class VKFragmentProgram *vk_prog; glsl::shader_properties m_shader_props{}; - vk::pipeline_binding_table m_binding_table{}; + + void prepareBindingTable(); public: VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) @@ -32,6 +33,7 @@ public: void Task(); const std::vector& get_inputs() { return inputs; } + protected: std::string getFloatTypeName(usz elementCount) override; std::string getHalfTypeName(usz elementCount) override; @@ -63,8 +65,19 @@ public: std::vector FragmentConstantOffsetCache; std::array output_color_masks{ {} }; - std::vector uniforms; + + struct + { + u32 context_buffer_location = umax; // Rasterizer context + u32 cbuf_location = umax; // Constants register file + u32 tex_param_location = umax; // Texture configuration data + u32 polygon_stipple_params_location = umax; // Polygon stipple settings + u32 ftex_location[16]; // Texture locations array + u32 ftex_stencil_location[16]; // Texture stencil mirror array + + } binding_table; + void SetInputs(std::vector& inputs); /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 107cd7b399..b2003f645c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,3 +1,4 @@ +#include "Emu/RSX/VK/vkutils/descriptors.h" #include "stdafx.h" #include "../Overlays/overlay_compile_notification.h" #include "../Overlays/Shaders/shader_loading_dialog_native.h" @@ -423,8 +424,8 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) std::vector& gpus = m_instance.enumerate_devices(); - //Actually confirm that the loader found at least one compatible device - //This should not happen unless something is wrong with the driver setup on the target system + // Actually confirm that the loader found at least one compatible device + // This should not happen unless something is wrong with the driver setup on the target system if (gpus.empty()) { //We can't throw in Emulator::Load, so we show error and return @@ -482,20 +483,16 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) swapchain_unavailable = true; } - //create command buffer... + // create command buffer... m_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_primary_cb_list.create(m_command_buffer_pool, vk::command_buffer::access_type_hint::flush_only); m_current_command_buffer = m_primary_cb_list.get(); m_current_command_buffer->begin(); - //Create secondary command_buffer for parallel operations + // Create secondary command_buffer for parallel operations m_secondary_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all); - //Precalculated stuff - rsx::simple_array binding_layout; - std::tie(m_pipeline_layout, m_descriptor_layouts, binding_layout) = vk::get_common_pipeline_layout(*m_device); - //Occlusion m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); m_occlusion_map.resize(rsx::reports::occlusion_query_count); @@ -508,11 +505,6 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0); } - // Generate frame contexts - const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls(); - const auto descriptor_type_sizes = vk::get_descriptor_pool_sizes(binding_layout); - m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls); - VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -852,12 +844,6 @@ VKGSRender::~VKGSRender() m_stencil_mirror_sampler.reset(); - // Pipeline descriptors - m_descriptor_pool.destroy(); - - vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); - vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layouts, nullptr); - // Queries m_occlusion_query_manager.reset(); m_cond_render_buffer.reset(); @@ -869,6 +855,9 @@ VKGSRender::~VKGSRender() m_command_buffer_pool.destroy(); m_secondary_command_buffer_pool.destroy(); + // Descriptors + vk::descriptors::flush(); + // Global resources vk::destroy_global_resources(); @@ -1157,18 +1146,6 @@ void VKGSRender::check_present_status() } } -VkDescriptorSet VKGSRender::allocate_descriptor_set() -{ - if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] - { - return m_descriptor_pool.allocate(m_descriptor_layouts, VK_TRUE); - } - else - { - return m_shader_interpreter.allocate_descriptor_set(); - } -} - void VKGSRender::set_viewport() { const auto [clip_width, clip_height] = rsx::apply_resolution_scale( @@ -1242,7 +1219,7 @@ void VKGSRender::on_init_thread() if (!m_overlay_manager) { m_frame->hide(); - m_shaders_cache->load(nullptr, m_pipeline_layout); + m_shaders_cache->load(nullptr); m_frame->show(); } else @@ -1250,7 +1227,7 @@ void VKGSRender::on_init_thread() rsx::shader_loading_dialog_native dlg(this); // TODO: Handle window resize messages during loading on GPUs without OUT_OF_DATE_KHR support - m_shaders_cache->load(&dlg, m_pipeline_layout); + m_shaders_cache->load(&dlg); } } @@ -1803,8 +1780,11 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, + current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); + + std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); return true; } } @@ -1870,7 +1850,7 @@ bool VKGSRender::load_program() vertex_program, fragment_program, m_pipeline_properties, - shadermode != shader_mode::recompiler, true, m_pipeline_layout); + shadermode != shader_mode::recompiler, true); vk::leave_uninterruptible(); @@ -1902,6 +1882,7 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, + current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); @@ -1923,6 +1904,16 @@ bool VKGSRender::load_program() } } + if (m_program) + { + std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); + } + else + { + m_vs_binding_table = nullptr; + m_fs_binding_table = nullptr; + } + return m_program != nullptr; } @@ -1934,13 +1925,14 @@ void VKGSRender::load_program_env() } const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; + const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty); const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); - const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); + const bool update_instruction_buffers = (!!m_interpreter_state && is_interpreter); const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw; @@ -2101,34 +2093,36 @@ void VKGSRender::load_program_env() } } - const auto& binding_table = m_device->get_pipeline_binding_table(); + m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->context_buffer_location); + m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->context_buffer_location); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->tex_param_location); + m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->polygon_stipple_params_location); - m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); - m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot, m_current_frame->descriptor_set); - - if (!m_shader_interpreter.is_interpreter(m_program)) + if (m_vs_binding_table->cbuf_location != umax) { - m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cbuf_location); } - else + + if (m_shader_interpreter.is_interpreter(m_program)) { - m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_instructions_buffer_info, vk::glsl::binding_set_index_vertex, m_shader_interpreter.get_vertex_instruction_location()); + m_program->bind_uniform(m_fragment_instructions_buffer_info, vk::glsl::binding_set_index_fragment, m_shader_interpreter.get_fragment_instruction_location()); + } + else if (m_fs_binding_table->cbuf_location != umax) + { + m_program->bind_uniform(m_fragment_constants_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->cbuf_location); } if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; - m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location); } if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) { - m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); - m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_uniform(m_instancing_indirection_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_lut_buffer_location); + m_program->bind_uniform(m_instancing_constants_array_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_cbuf_location); } // Clear flags @@ -2155,6 +2149,19 @@ void VKGSRender::load_program_env() m_graphics_state.clear(handled_flags); } +std::pair VKGSRender::get_binding_table() const +{ + ensure(m_program); + + if (!m_shader_interpreter.is_interpreter(m_program)) + { + return { &m_vertex_prog->binding_table, &m_fragment_prog->binding_table }; + } + + const auto& [vs, fs] = m_shader_interpreter.get_shaders(); + return { &vs->binding_table, &fs->binding_table }; +} + bool VKGSRender::is_current_program_interpreted() const { return m_program && m_shader_interpreter.is_interpreter(m_program); @@ -2215,7 +2222,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ vkCmdPushConstants( *m_current_command_buffer, - m_pipeline_layout, + m_program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, data_length, diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index e16d8d1afa..107da8ebf2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -26,6 +26,9 @@ using namespace vk::vmm_allocation_pool_; // clang workaround. using namespace vk::upscaling_flags_; // ditto +using vs_binding_table_t = decltype(VKVertexProgram::binding_table); +using fs_binding_table_t = decltype(VKFragmentProgram::binding_table); + namespace vk { using host_data_t = rsx::host_gpu_context_t; @@ -53,6 +56,9 @@ private: vk::glsl::program *m_prev_program = nullptr; vk::pipeline_props m_pipeline_properties; + const vs_binding_table_t* m_vs_binding_table = nullptr; + const fs_binding_table_t* m_fs_binding_table = nullptr; + vk::texture_cache m_texture_cache; vk::surface_cache m_rtts; @@ -78,6 +84,8 @@ private: VkDependencyInfoKHR m_async_compute_dependency_info {}; VkMemoryBarrier2KHR m_async_compute_memory_barrier {}; + std::pair get_binding_table() const; + public: //vk::fbo draw_fbo; std::unique_ptr m_vertex_cache; @@ -105,11 +113,6 @@ private: vk::command_buffer_chunk* m_current_command_buffer = nullptr; std::unique_ptr m_host_object_data; - - vk::descriptor_pool m_descriptor_pool; - VkDescriptorSetLayout m_descriptor_layouts = VK_NULL_HANDLE; - VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; - vk::framebuffer_holder* m_draw_fbo = nullptr; sizeu m_swapchain_dims{}; @@ -220,8 +223,6 @@ private: void update_draw_state(); void check_present_status(); - VkDescriptorSet allocate_descriptor_set(); - vk::vertex_upload_info upload_vertex_data(); rsx::simple_array m_scratch_mem; diff --git a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp index acd4c42cb2..8f38378f52 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp +++ b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp @@ -178,8 +178,6 @@ namespace vk VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; - vk::descriptor_set descriptor_set; - rsx::flags32_t flags = 0; u32 present_image = -1; @@ -193,7 +191,6 @@ namespace vk { present_wait_semaphore = other.present_wait_semaphore; acquire_signal_semaphore = other.acquire_signal_semaphore; - descriptor_set.swap(other.descriptor_set); flags = other.flags; heap_snapshot = other.heap_snapshot; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index f4e7d092f9..34ca64ca66 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -43,106 +43,46 @@ namespace vk if (!m_vao.heap) { m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128); + } + + if (!m_ubo.heap && m_num_uniform_buffers > 0) + { m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128); } } - void overlay_pass::init_descriptors() - { - rsx::simple_array descriptor_pool_sizes = {}; - - if (m_num_uniform_buffers) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_num_uniform_buffers }); - }; - - if (m_num_usable_samplers) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_num_usable_samplers }); - } - - if (m_num_input_attachments) - { - descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, m_num_input_attachments }); - } - - // Reserve descriptor pools - m_descriptor_pool.create(*m_device, descriptor_pool_sizes); - - const auto num_bindings = m_num_uniform_buffers + m_num_usable_samplers + m_num_input_attachments; - rsx::simple_array bindings(num_bindings); - u32 binding_slot = 0; - - for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding_slot) - { - bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; - bindings[binding_slot].descriptorCount = 1; - bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[binding_slot].binding = binding_slot; - bindings[binding_slot].pImmutableSamplers = nullptr; - } - - ensure(binding_slot == num_bindings); - m_descriptor_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &m_descriptor_layout; - - std::vector push_constants = get_push_constants(); - if (!push_constants.empty()) - { - layout_info.pushConstantRangeCount = u32(push_constants.size()); - layout_info.pPushConstantRanges = push_constants.data(); - } - - CHECK_RESULT(vkCreatePipelineLayout(*m_device, &layout_info, nullptr, &m_pipeline_layout)); - } - std::vector overlay_pass::get_vertex_inputs() { check_heap(); - return{}; + return {}; } std::vector overlay_pass::get_fragment_inputs() { - std::vector fs_inputs; + using namespace vk::glsl; + + std::vector fs_inputs; u32 binding = 0; for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding) { const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : ""); - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, name }); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_uniform_buffer, 0, 0); + fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding) { - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "fs" + std::to_string(n) }); + const std::string name = "fs" + std::to_string(n); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); + fs_inputs.push_back(input); } for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding) { - fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "sp" + std::to_string(n) }); + const std::string name = "sp" + std::to_string(n); + const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); + fs_inputs.push_back(input); } return fs_inputs; @@ -208,20 +148,20 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = m_pipeline_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = render_pass; auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); + auto program = compiler->compile(info, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); auto result = program.get(); m_program_cache[storage_key] = std::move(program); return result; } - void overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) + vk::glsl::program* overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) { vk::glsl::program *program = nullptr; const auto key = get_pipeline_key(pass); @@ -232,8 +172,6 @@ namespace vk else program = build_pipeline(key, pass); - m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout); - if (!m_sampler && !src.empty()) { m_sampler = std::make_unique(*m_device, @@ -245,21 +183,23 @@ namespace vk if (m_num_uniform_buffers > 0) { - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, 0); } for (uint n = 0; n < src.size(); ++n) { VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout }; - program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); + const auto [set, location] = program->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "fs" + std::to_string(n)); + program->bind_uniform(info, set, location); } - vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline); - m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout); + program->bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); VkBuffer buffers = m_vao.heap->value; VkDeviceSize offsets = m_vao_offset; vkCmdBindVertexBuffers(cmd, 0, 1, &buffers, &offsets); + + return program; } void overlay_pass::create(const vk::render_device& dev) @@ -267,8 +207,6 @@ namespace vk if (!initialized) { m_device = &dev; - init_descriptors(); - initialized = true; } } @@ -282,10 +220,6 @@ namespace vk m_program_cache.clear(); m_sampler.reset(); - vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layout, nullptr); - vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); - m_descriptor_pool.destroy(); - initialized = false; } } @@ -303,7 +237,7 @@ namespace vk return vk::get_framebuffer(dev, target->width(), target->height(), m_num_input_attachments > 0, render_pass, { target }); } - void overlay_pass::emit_geometry(vk::command_buffer& cmd) + void overlay_pass::emit_geometry(vk::command_buffer& cmd, glsl::program* /*program*/) { vkCmdDraw(cmd, num_drawable_elements, 1, first_vertex, 0); } @@ -328,11 +262,11 @@ namespace vk // This call clobbers dynamic state cmd.flags |= vk::command_buffer::cb_reload_dynamic_state; - load_program(cmd, render_pass, src); + auto program = load_program(cmd, render_pass, src); set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height()); vk::begin_renderpass(cmd, render_pass, fbo->value, { positionu{0u, 0u}, sizeu{fbo->width(), fbo->height()} }); - emit_geometry(cmd); + emit_geometry(cmd, program); } void overlay_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::image* target, const std::vector& src, VkRenderPass render_pass) @@ -376,6 +310,7 @@ namespace vk // 2 input textures m_num_usable_samplers = 2; + m_num_uniform_buffers = 0; renderpass_config.set_attachment_count(1); renderpass_config.set_color_mask(0, true, true, true, true); @@ -550,24 +485,39 @@ namespace vk false, true, desc->get_data(), owner_uid); } - std::vector ui_overlay_renderer::get_push_constants() + std::vector ui_overlay_renderer::get_vertex_inputs() { - return - { - { - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .offset = 0, - .size = 68 - }, - { - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .offset = 68, - .size = 12 - } - }; + auto result = overlay_pass::get_vertex_inputs(); + result.push_back( + glsl::program_input::make( + ::glsl::glsl_vertex_program, + "push_constants", + glsl::input_type_push_constant, + 0, + 0, + glsl::push_constant_ref { .size = 68 } + ) + ); + return result; } - void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + std::vector ui_overlay_renderer::get_fragment_inputs() + { + auto result = overlay_pass::get_fragment_inputs(); + result.push_back( + glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + glsl::input_type_push_constant, + 0, + 0, + glsl::push_constant_ref {.offset = 68, .size = 12 } + ) + ); + return result; + } + + void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { // Byte Layout // 00: vec4 ui_scale; @@ -600,7 +550,7 @@ namespace vk .get(); push_buf[16] = std::bit_cast(vert_config); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); // 2. Fragment stuff rsx::overlays::fragment_options frag_opts; @@ -614,7 +564,7 @@ namespace vk push_buf[1] = m_time; push_buf[2] = m_blur_strength; - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); } void ui_overlay_renderer::set_primitive_type(rsx::overlays::primitive_type type) @@ -641,7 +591,7 @@ namespace vk } } - void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd) + void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd, glsl::program* program) { if (m_current_primitive_type == rsx::overlays::primitive_type::quad_list) { @@ -657,7 +607,7 @@ namespace vk } else { - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } @@ -759,22 +709,30 @@ namespace vk // Disable samplers m_num_usable_samplers = 0; + // Disable UBOs + m_num_uniform_buffers = 0; + renderpass_config.set_depth_mask(false); renderpass_config.set_color_mask(0, true, true, true, true); renderpass_config.set_attachment_count(1); } - std::vector attachment_clear_pass::get_push_constants() + std::vector attachment_clear_pass::get_vertex_inputs() { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - constant.offset = 0; - constant.size = 32; - - return { constant }; + check_heap(); + return + { + vk::glsl::program_input::make( + ::glsl::glsl_vertex_program, + "push_constants", + vk::glsl::input_type_push_constant, + 0, + 0, + glsl::push_constant_ref{ .size = 32 }) + }; } - void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { f32 data[8]; data[0] = clear_color.r; @@ -786,7 +744,7 @@ namespace vk data[6] = colormask.b; data[7] = colormask.a; - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); } void attachment_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -850,6 +808,9 @@ namespace vk "{\n" " out_color = vec4(0.);\n" "}\n"; + + m_num_uniform_buffers = 0; + m_num_usable_samplers = 0; } void stencil_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -898,7 +859,7 @@ namespace vk std::pair repl_list[] = { - { "%sampler_binding", fmt::format("(%d + x)", sampler_location(0)) }, + { "%sampler_binding", "x" }, { "%set_decorator", "set=0" }, }; fs_src = fmt::replace_all(fs_src, repl_list); @@ -908,21 +869,28 @@ namespace vk renderpass_config.set_attachment_count(1); m_num_usable_samplers = 2; + m_num_uniform_buffers = 0; } - std::vector video_out_calibration_pass::get_push_constants() + std::vector video_out_calibration_pass::get_fragment_inputs() { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - constant.offset = 0; - constant.size = 16; - - return { constant }; + auto result = overlay_pass::get_fragment_inputs(); + result.push_back( + vk::glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + vk::glsl::input_type_push_constant, + 0, + 0, + glsl::push_constant_ref{ .size = 16 } + ) + ); + return result; } - void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) + void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); } void video_out_calibration_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 7308a5c894..a968f706a1 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -44,11 +44,6 @@ namespace vk vk::glsl::shader m_vertex_shader; vk::glsl::shader m_fragment_shader; - vk::descriptor_pool m_descriptor_pool; - descriptor_set m_descriptor_set; - VkDescriptorSetLayout m_descriptor_layout = nullptr; - VkPipelineLayout m_pipeline_layout = nullptr; - VkFilter m_sampler_filter = VK_FILTER_LINEAR; u32 m_num_usable_samplers = 1; u32 m_num_input_attachments = 0; @@ -83,8 +78,6 @@ namespace vk void check_heap(); - void init_descriptors(); - virtual void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/) {} virtual std::vector get_vertex_inputs(); @@ -92,11 +85,6 @@ namespace vk virtual void get_dynamic_state_entries(std::vector& /*state_descriptors*/) {} - virtual std::vector get_push_constants() - { - return {}; - } - int sampler_location(int index) const { return 1 + index; } int input_attachment_location(int index) const { return 1 + m_num_usable_samplers + index; } @@ -113,8 +101,7 @@ namespace vk } vk::glsl::program* build_pipeline(u64 storage_key, VkRenderPass render_pass); - - void load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); + vk::glsl::program* load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); virtual void create(const vk::render_device& dev); virtual void destroy(); @@ -123,7 +110,7 @@ namespace vk vk::framebuffer* get_framebuffer(vk::image* target, VkRenderPass render_pass); - virtual void emit_geometry(vk::command_buffer& cmd); + virtual void emit_geometry(vk::command_buffer& cmd, glsl::program* program); virtual void set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h); @@ -169,13 +156,14 @@ namespace vk vk::image_view* find_font(rsx::overlays::font* font, vk::command_buffer& cmd, vk::data_heap& upload_heap); vk::image_view* find_temp_image(rsx::overlays::image_info_base* desc, vk::command_buffer& cmd, vk::data_heap& upload_heap, u32 owner_uid); - std::vector get_push_constants() override; + std::vector get_vertex_inputs() override; + std::vector get_fragment_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; void set_primitive_type(rsx::overlays::primitive_type type); - void emit_geometry(vk::command_buffer& cmd) override; + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override; void run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, VkRenderPass render_pass, vk::data_heap& upload_heap, rsx::overlays::overlay& ui); @@ -189,7 +177,7 @@ namespace vk attachment_clear_pass(); - std::vector get_push_constants() override; + std::vector get_vertex_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; @@ -227,7 +215,7 @@ namespace vk video_out_calibration_pass(); - std::vector get_push_constants() override; + std::vector get_fragment_inputs() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override; diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index 52742e1241..26e0b64098 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -36,12 +36,12 @@ namespace vk { if (job.is_graphics_job) { - auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.pipe_layout, job.inputs, {}); + auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.inputs, {}, job.flags); job.callback_func(compiled); } else { - auto compiled = int_compile_compute_pipe(job.compute_data, job.pipe_layout); + auto compiled = int_compile_compute_pipe(job.compute_data, job.inputs, job.flags); job.callback_func(compiled); } } @@ -50,25 +50,33 @@ namespace vk } } - std::unique_ptr pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout) + std::unique_ptr pipe_compiler::int_compile_compute_pipe( + const VkComputePipelineCreateInfo& create_info, + const std::vector& cs_inputs, + op_flags flags) { - VkPipeline pipeline; - vkCreateComputePipelines(*g_render_device, nullptr, 1, &create_info, nullptr, &pipeline); - return std::make_unique(*m_device, pipeline, pipe_layout); + auto program = std::make_unique(*m_device, create_info, cs_inputs); + program->link(flags & SEPARATE_SHADER_OBJECTS); + return program; } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe( + const VkGraphicsPipelineCreateInfo& create_info, + const std::vector& vs_inputs, + const std::vector& fs_inputs, + op_flags flags) { - VkPipeline pipeline; - CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, VK_NULL_HANDLE, 1, &create_info, nullptr, &pipeline)); - auto result = std::make_unique(*m_device, pipeline, pipe_layout, vs_inputs, fs_inputs); - result->link(); - return result; + auto program = std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); + program->link(flags & SEPARATE_SHADER_OBJECTS); + return program; } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe( + const vk::pipeline_props &create_info, + VkShaderModule modules[2], + const std::vector& vs_inputs, + const std::vector& fs_inputs, + op_flags flags) { VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -157,52 +165,54 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = pipe_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key); - return int_compile_graphics_pipe(info, pipe_layout, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(info, vs_inputs, fs_inputs, flags); } std::unique_ptr pipe_compiler::compile( const VkComputePipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, - op_flags flags, callback_t callback) + op_flags flags, callback_t callback, + const std::vector& cs_inputs) { - if (flags == COMPILE_INLINE) + if (flags & COMPILE_INLINE) { - return int_compile_compute_pipe(create_info, pipe_layout); + return int_compile_compute_pipe(create_info, cs_inputs, flags); } - m_work_queue.push(create_info, pipe_layout, callback); + m_work_queue.push(create_info, cs_inputs, flags, callback); return {}; } std::unique_ptr pipe_compiler::compile( const VkGraphicsPipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, op_flags flags, callback_t /*callback*/, - const std::vector& vs_inputs, const std::vector& fs_inputs) + const std::vector& vs_inputs, + const std::vector& fs_inputs) { // It is very inefficient to defer this as all pointers need to be saved - ensure(flags == COMPILE_INLINE); - return int_compile_graphics_pipe(create_info, pipe_layout, vs_inputs, fs_inputs); + ensure(flags & COMPILE_INLINE); + return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs, flags); } std::unique_ptr pipe_compiler::compile( - const vk::pipeline_props& create_info, - VkShaderModule module_handles[2], - VkPipelineLayout pipe_layout, + const vk::pipeline_props &create_info, + VkShaderModule vs, + VkShaderModule fs, op_flags flags, callback_t callback, - const std::vector& vs_inputs, const std::vector& fs_inputs) + const std::vector& vs_inputs, + const std::vector& fs_inputs) { - if (flags == COMPILE_INLINE) + VkShaderModule modules[] = { vs, fs }; + if (flags & COMPILE_INLINE) { - return int_compile_graphics_pipe(create_info, module_handles, pipe_layout, vs_inputs, fs_inputs); + return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs, flags); } - m_work_queue.push(create_info, pipe_layout, module_handles, vs_inputs, fs_inputs, callback); + m_work_queue.push(create_info, modules, vs_inputs, fs_inputs, flags, callback); return {}; } diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h index 836bc5f14f..762e8aadfc 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -53,13 +53,16 @@ namespace vk class pipe_compiler { public: - enum op_flags + enum op_flag_bits { COMPILE_DEFAULT = 0, COMPILE_INLINE = 1, - COMPILE_DEFERRED = 2 + COMPILE_DEFERRED = 2, + SEPARATE_SHADER_OBJECTS = 4 }; + using op_flags = rsx::flags32_t; + using callback_t = std::function&)>; pipe_compiler(); @@ -68,21 +71,20 @@ namespace vk void initialize(const vk::render_device* pdev); std::unique_ptr compile( - const VkComputePipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, - op_flags flags, callback_t callback = {}); + const VkComputePipelineCreateInfo& cs, + op_flags flags, callback_t callback = {}, + const std::vector& cs_inputs = {}); std::unique_ptr compile( const VkGraphicsPipelineCreateInfo& create_info, - VkPipelineLayout pipe_layout, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); std::unique_ptr compile( const vk::pipeline_props &create_info, - VkShaderModule module_handles[2], - VkPipelineLayout pipe_layout, + VkShaderModule vs, + VkShaderModule fs, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); @@ -112,24 +114,25 @@ namespace vk vk::pipeline_props graphics_data; compute_pipeline_props compute_data; - VkPipelineLayout pipe_layout; VkShaderModule graphics_modules[2]; std::vector inputs; + op_flags flags; + pipe_compiler_job( const vk::pipeline_props& props, - VkPipelineLayout layout, VkShaderModule modules[2], const std::vector& vs_in, const std::vector& fs_in, + op_flags flags_, callback_t func) { callback_func = func; graphics_data = props; - pipe_layout = layout; graphics_modules[0] = modules[0]; graphics_modules[1] = modules[1]; is_graphics_job = true; + flags = flags_; inputs.reserve(vs_in.size() + fs_in.size()); inputs.insert(inputs.end(), vs_in.begin(), vs_in.end()); @@ -138,24 +141,42 @@ namespace vk pipe_compiler_job( const VkComputePipelineCreateInfo& props, - VkPipelineLayout layout, + const std::vector& cs_in, + op_flags flags_, callback_t func) { callback_func = func; compute_data = props; - pipe_layout = layout; is_graphics_job = false; + flags = flags_; + + graphics_modules[0] = VK_NULL_HANDLE; + graphics_modules[1] = VK_NULL_HANDLE; + + inputs = cs_in; } }; const vk::render_device* m_device = nullptr; lf_queue m_work_queue; - std::unique_ptr int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout); - std::unique_ptr int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs); - std::unique_ptr int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, - const std::vector& vs_inputs, const std::vector& fs_inputs); + std::unique_ptr int_compile_compute_pipe( + const VkComputePipelineCreateInfo& create_info, + const std::vector& cs_inputs, + op_flags flags); + + std::unique_ptr int_compile_graphics_pipe( + const VkGraphicsPipelineCreateInfo& create_info, + const std::vector& vs_inputs, + const std::vector& fs_inputs, + op_flags flags); + + std::unique_ptr int_compile_graphics_pipe( + const vk::pipeline_props &create_info, + VkShaderModule modules[2], + const std::vector& vs_inputs, + const std::vector& fs_inputs, + op_flags flags); }; void initialize_pipe_compiler(int num_worker_threads = -1); diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 4f9f535a76..3e4ee000df 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -46,15 +46,16 @@ namespace vk const fragment_program_type& fragmentProgramData, const vk::pipeline_props& pipelineProperties, bool compile_async, - std::function callback, - VkPipelineLayout common_pipeline_layout) + std::function callback) { - const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; - VkShaderModule modules[2] = { vertexProgramData.handle, fragmentProgramData.handle }; + vk::pipe_compiler::op_flags compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; + compiler_flags |= vk::pipe_compiler::SEPARATE_SHADER_OBJECTS; auto compiler = vk::get_pipe_compiler(); auto result = compiler->compile( - pipelineProperties, modules, common_pipeline_layout, + pipelineProperties, + vertexProgramData.handle, + fragmentProgramData.handle, compiler_flags, callback, vertexProgramData.uniforms, fragmentProgramData.uniforms); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index 57174caa98..aae8407103 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -1,5 +1,6 @@ #include "stdafx.h" #include "VKProgramPipeline.h" +#include "VKResourceManager.h" #include "vkutils/descriptors.h" #include "vkutils/device.h" @@ -7,10 +8,85 @@ namespace vk { + extern const vk::render_device* get_current_renderer(); + namespace glsl { using namespace ::glsl; + bool operator == (const descriptor_slot_t& a, const VkDescriptorImageInfo& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && + ptr->imageView == b.imageView && + ptr->sampler == b.sampler && + ptr->imageLayout == b.imageLayout; + } + + bool operator == (const descriptor_slot_t& a, const VkDescriptorBufferInfo& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && + ptr->buffer == b.buffer && + ptr->offset == b.offset && + ptr->range == b.range; + } + + bool operator == (const descriptor_slot_t& a, const VkBufferView& b) + { + const auto ptr = std::get_if(&a); + return !!ptr && *ptr == b; + } + + VkDescriptorType to_descriptor_type(program_input_type type) + { + switch (type) + { + case input_type_uniform_buffer: + return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + case input_type_texel_buffer: + return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + case input_type_texture: + return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + case input_type_storage_buffer: + return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + case input_type_storage_texture: + return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + default: + fmt::throw_exception("Unexpected program input type %d", static_cast(type)); + } + } + + VkShaderStageFlags to_shader_stage_flags(::glsl::program_domain domain) + { + switch (domain) + { + case glsl_vertex_program: + return VK_SHADER_STAGE_VERTEX_BIT; + case glsl_fragment_program: + return VK_SHADER_STAGE_FRAGMENT_BIT; + case glsl_compute_program: + return VK_SHADER_STAGE_COMPUTE_BIT; + default: + fmt::throw_exception("Unexpected domain %d", static_cast(domain)); + } + } + + const char* to_string(::glsl::program_domain domain) + { + switch (domain) + { + case glsl_vertex_program: + return "vertex"; + case glsl_fragment_program: + return "fragment"; + case glsl_compute_program: + return "compute"; + default: + fmt::throw_exception("Unexpected domain %d", static_cast(domain)); + } + } + void shader::create(::glsl::program_domain domain, const std::string& source) { type = domain; @@ -23,11 +99,8 @@ namespace vk if (!spirv::compile_glsl_to_spv(m_compiled, m_source, type, ::glsl::glsl_rules_vulkan)) { - const std::string shader_type = type == ::glsl::program_domain::glsl_vertex_program ? "vertex" : - type == ::glsl::program_domain::glsl_fragment_program ? "fragment" : "compute"; - rsx_log.notice("%s", m_source); - fmt::throw_exception("Failed to compile %s shader", shader_type); + fmt::throw_exception("Failed to compile %s shader", to_string(type)); } VkShaderModuleCreateInfo vs_info; @@ -69,165 +142,505 @@ namespace vk return m_handle; } - void program::create_impl() + void program::init() { - linked = false; - attribute_location_mask = 0; - vertex_attributes_mask = 0; - - fs_texture_bindings.fill(~0u); - fs_texture_mirror_bindings.fill(~0u); - vs_texture_bindings.fill(~0u); + m_linked = false; } - program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs) - : m_device(dev), pipeline(p), pipeline_layout(layout) + program::program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs) + : m_device(dev), m_info(create_info) { - create_impl(); - load_uniforms(vertex_input); + init(); + + load_uniforms(vertex_inputs); load_uniforms(fragment_inputs); } - program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout) - : m_device(dev), pipeline(p), pipeline_layout(layout) + program::program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs) + : m_device(dev), m_info(create_info) { - create_impl(); + init(); + + load_uniforms(compute_inputs); } program::~program() { - vkDestroyPipeline(m_device, pipeline, nullptr); + vkDestroyPipeline(m_device, m_pipeline, nullptr); + + if (m_pipeline_layout) + { + vkDestroyPipelineLayout(m_device, m_pipeline_layout, nullptr); + + for (auto& set : m_sets) + { + set.destroy(); + } + } } program& program::load_uniforms(const std::vector& inputs) { - ensure(!linked); // "Cannot change uniforms in already linked program!" + ensure(!m_linked); // "Cannot change uniforms in already linked program!" for (auto &item : inputs) { - uniforms[item.type].push_back(item); + ensure(item.set < binding_set_index_max_enum); // Ensure we have a valid set id + ensure(item.location < 128u || item.type == input_type_push_constant); // Arbitrary limit but useful to catch possibly uninitialized values + m_sets[item.set].m_inputs[item.type].push_back(item); } return *this; } - program& program::link() + program& program::link(bool separate_objects) { - // Preprocess texture bindings - // Link step is only useful for rasterizer programs, compute programs do not need this - for (const auto &uniform : uniforms[program_input_type::input_type_texture]) - { - if (const auto name_start = uniform.name.find("tex"); name_start != umax) - { - const auto name_end = uniform.name.find("_stencil"); - const auto index_start = name_start + 3; // Skip 'tex' part - const auto index_length = (name_end != umax) ? name_end - index_start : name_end; - const auto index_part = uniform.name.substr(index_start, index_length); - const auto index = std::stoi(index_part); + auto p_graphics_info = std::get_if(&m_info); + auto p_compute_info = !p_graphics_info ? std::get_if(&m_info) : nullptr; + const bool is_graphics_pipe = p_graphics_info != nullptr; - if (name_start == 0) + if (!is_graphics_pipe) [[ likely ]] + { + // We only support compute and graphics, so disable this for compute + separate_objects = false; + } + + if (!separate_objects) + { + // Collapse all sets into set 0 if validation passed + auto& sink = m_sets[0]; + for (auto& set : m_sets) + { + if (&set == &sink) { - // Fragment texture (tex...) - if (name_end == umax) - { - // Normal texture - fs_texture_bindings[index] = uniform.location; - } - else - { - // Stencil mirror - fs_texture_mirror_bindings[index] = uniform.location; - } + continue; } - else + + for (auto& type_arr : set.m_inputs) { - // Vertex texture (vtex...) - vs_texture_bindings[index] = uniform.location; + if (type_arr.empty()) + { + continue; + } + + auto type = type_arr.front().type; + auto& dst = sink.m_inputs[type]; + dst.insert(dst.end(), type_arr.begin(), type_arr.end()); + + // Clear + type_arr.clear(); + } + } + + sink.validate(); + sink.init(m_device); + } + else + { + for (auto& set : m_sets) + { + for (auto& type_arr : set.m_inputs) + { + if (type_arr.empty()) + { + continue; + } + + // Real set + set.validate(); + set.init(m_device); + break; } } } - linked = true; + create_pipeline_layout(); + ensure(m_pipeline_layout); + + if (is_graphics_pipe) + { + VkGraphicsPipelineCreateInfo create_info = *p_graphics_info; + create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); + } + else + { + VkComputePipelineCreateInfo create_info = *p_compute_info; + create_info.layout = m_pipeline_layout; + CHECK_RESULT(vkCreateComputePipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); + } + + m_linked = true; return *this; } bool program::has_uniform(program_input_type type, const std::string& uniform_name) { - const auto& uniform = uniforms[type]; - return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) + for (auto& set : m_sets) { - return u.name == uniform_name; - }); + const auto& uniform = set.m_inputs[type]; + return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) + { + return u.name == uniform_name; + }); + } + + return false; } - void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type, vk::descriptor_set &set) + std::pair program::get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name) { - for (const auto &uniform : uniforms[program_input_type::input_type_texture]) + for (unsigned i = 0; i < ::size32(m_sets); ++i) { - if (uniform.name == uniform_name) + const auto& type_arr = m_sets[i].m_inputs[type]; + const auto result = std::find_if(type_arr.cbegin(), type_arr.cend(), [&](const auto& u) { - set.push(image_descriptor, type, uniform.location); - attribute_location_mask |= (1ull << uniform.location); - return; + return u.domain == domain && u.name == uniform_name; + }); + + if (result != type_arr.end()) + { + return { i, result->location }; } } - rsx_log.notice("texture not found in program: %s", uniform_name.c_str()); + return { umax, umax }; } - void program::bind_uniform(const VkDescriptorImageInfo & image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror) + void program::bind_uniform(const VkDescriptorImageInfo& image_descriptor, u32 set_id, u32 binding_point) { - ensure(domain != ::glsl::program_domain::glsl_compute_program); - - u32 binding; - if (domain == ::glsl::program_domain::glsl_fragment_program) + if (m_sets[set_id].m_descriptor_slots[binding_point] == image_descriptor) { - binding = (is_stencil_mirror) ? fs_texture_mirror_bindings[texture_unit] : fs_texture_bindings[texture_unit]; - } - else - { - binding = vs_texture_bindings[texture_unit]; - } - - if (binding != ~0u) - { - set.push(image_descriptor, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); - attribute_location_mask |= (1ull << binding); return; } - rsx_log.notice("texture not found in program: %stex%u", (domain == ::glsl::program_domain::glsl_vertex_program)? "v" : "", texture_unit); + m_sets[set_id].notify_descriptor_slot_updated(binding_point, image_descriptor); } - void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set) + void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point) { - bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, set); - } - - void program::bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set) - { - set.push(buffer_view, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, binding_point); - attribute_location_mask |= (1ull << binding_point); - } - - void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set) - { - for (const auto &uniform : uniforms[type]) + if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_descriptor) { - if (uniform.name == binding_name) + return; + } + + m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_descriptor); + } + + void program::bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point) + { + if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_view) + { + return; + } + + m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_view); + } + + void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point) + { + // Non-caching write + auto& set = m_sets[set_id]; + auto& arr = set.m_scratch_images_array; + + descriptor_array_ref_t data + { + .first = arr.size(), + .count = static_cast(count) + }; + + arr.reserve(arr.size() + static_cast(count)); + for (int i = 0; i < count; ++i) + { + arr.push_back(image_descriptors[i]); + } + + set.notify_descriptor_slot_updated(binding_point, data); + } + + void program::create_pipeline_layout() + { + ensure(!m_linked); + ensure(m_pipeline_layout == VK_NULL_HANDLE); + + rsx::simple_array push_constants{}; + rsx::simple_array set_layouts{}; + + for (auto& set : m_sets) + { + if (!set.m_device) { - bind_uniform(buffer_view, uniform.location, set); - return; + continue; + } + + set.create_descriptor_set_layout(); + set_layouts.push_back(set.m_descriptor_set_layout); + + for (const auto& input : set.m_inputs[input_type_push_constant]) + { + const auto& range = input.as_push_constant(); + push_constants.push_back({ + .stageFlags = to_shader_stage_flags(input.domain), + .offset = range.offset, + .size = range.size + }); } } - rsx_log.notice("vertex buffer not found in program: %s", binding_name.c_str()); + VkPipelineLayoutCreateInfo create_info + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .flags = 0, + .setLayoutCount = set_layouts.size(), + .pSetLayouts = set_layouts.data(), + .pushConstantRangeCount = push_constants.size(), + .pPushConstantRanges = push_constants.data() + }; + CHECK_RESULT(vkCreatePipelineLayout(m_device, &create_info, nullptr, &m_pipeline_layout)); } - void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set) + program& program::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point) { - set.push(buffer_descriptor, type, binding_point); - attribute_location_mask |= (1ull << binding_point); + VkDescriptorSet bind_sets[binding_set_index_max_enum]; + unsigned count = 0; + + for (auto& set : m_sets) + { + if (!set.m_device) + { + continue; + } + + bind_sets[count++] = set.commit(); // Commit variable changes and return handle to the new set + } + + vkCmdBindPipeline(cmd, bind_point, m_pipeline); + vkCmdBindDescriptorSets(cmd, bind_point, m_pipeline_layout, 0, count, bind_sets, 0, nullptr); + return *this; + } + + void descriptor_table_t::destroy() + { + if (!m_device) + { + return; + } + + if (m_descriptor_set_layout) + { + vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); + } + + if (m_descriptor_pool) + { + m_descriptor_pool->destroy(); + m_descriptor_pool.reset(); + } + + m_device = VK_NULL_HANDLE; + } + + void descriptor_table_t::init(VkDevice dev) + { + m_device = dev; + + size_t bind_slots_count = 0; + for (auto& type_arr : m_inputs) + { + if (type_arr.empty() || type_arr.front().type == input_type_push_constant) + { + continue; + } + + bind_slots_count += type_arr.size(); + } + + m_descriptor_slots.resize(bind_slots_count); + std::memset(m_descriptor_slots.data(), 0, sizeof(descriptor_slot_t) * bind_slots_count); + + m_descriptors_dirty.resize(bind_slots_count); + std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); + } + + VkDescriptorSet descriptor_table_t::allocate_descriptor_set() + { + if (!m_descriptor_pool) + { + create_descriptor_pool(); + } + + return m_descriptor_pool->allocate(m_descriptor_set_layout); + } + + VkDescriptorSet descriptor_table_t::commit() + { + if (!m_descriptor_set) + { + m_any_descriptors_dirty = true; + std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); + } + + // Check if we need to actually open a new set + if (!m_any_descriptors_dirty) + { + return m_descriptor_set.value(); + } + + auto push_descriptor_slot = [this](unsigned idx) + { + const auto& slot = m_descriptor_slots[idx]; + const VkDescriptorType type = m_descriptor_types[idx]; + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + if (auto ptr = std::get_if(&slot)) + { + m_descriptor_set.push(*ptr, type, idx); + return; + } + + if (auto ptr = std::get_if(&slot)) + { + ensure(type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); // Only type supported at the moment + ensure((ptr->first + ptr->count) <= m_scratch_images_array.size()); + m_descriptor_set.push(m_scratch_images_array.data() + ptr->first, ptr->count, type, idx); + return; + } + + fmt::throw_exception("Unexpected descriptor structure at index %u", idx); + }; + + m_descriptor_set = allocate_descriptor_set(); + + for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) + { + if (m_descriptors_dirty[i]) + { + // Push + push_descriptor_slot(i); + m_descriptors_dirty[i] = false; + continue; + } + + // We should copy here if possible. + // Without descriptor_buffer, the most efficient option is to just use the normal bind logic due to the pointer-based nature of the descriptor inputs and no stride. + push_descriptor_slot(i); + } + + m_descriptor_set.on_bind(); + m_any_descriptors_dirty = false; + m_scratch_images_array.clear(); + + return m_descriptor_set.value(); + } + + void descriptor_table_t::create_descriptor_set_layout() + { + ensure(m_descriptor_set_layout == VK_NULL_HANDLE); + + rsx::simple_array bindings; + bindings.reserve(16); + + m_descriptor_pool_sizes.clear(); + m_descriptor_pool_sizes.reserve(input_type_max_enum); + + std::unordered_map descriptor_type_map; + + auto descriptor_count = [](const std::string& name) -> u32 + { + const auto start = name.find_last_of("["); + if (start == std::string::npos) + { + return 1; + } + + const auto end = name.find_last_of("]"); + ensure(end != std::string::npos && start < end, "Invalid variable name"); + + const std::string array_size = name.substr(start + 1, end - start - 1); + if (const auto count = std::atoi(array_size.c_str()); + count > 0) + { + return count; + } + + return 1; + }; + + for (const auto& type_arr : m_inputs) + { + if (type_arr.empty() || type_arr.front().type == input_type_push_constant) + { + continue; + } + + VkDescriptorType type = to_descriptor_type(type_arr.front().type); + m_descriptor_pool_sizes.push_back({ .type = type }); + + for (const auto& input : type_arr) + { + VkDescriptorSetLayoutBinding binding + { + .binding = input.location, + .descriptorType = type, + .descriptorCount = descriptor_count(input.name), + .stageFlags = to_shader_stage_flags(input.domain) + }; + bindings.push_back(binding); + + descriptor_type_map[input.location] = type; + m_descriptor_pool_sizes.back().descriptorCount += binding.descriptorCount; + } + } + + m_descriptor_types.resize(::size32(m_descriptors_dirty)); + + for (u32 i = 0; i < ::size32(m_descriptors_dirty); ++i) + { + if (descriptor_type_map.find(i) == descriptor_type_map.end()) + { + fmt::throw_exception("Invalid input structure. Some input bindings were not declared!"); + } + m_descriptor_types[i] = descriptor_type_map[i]; + } + + m_descriptor_set_layout = vk::descriptors::create_layout(bindings); + } + + void descriptor_table_t::create_descriptor_pool() + { + m_descriptor_pool = std::make_unique(); + m_descriptor_pool->create(*vk::get_current_renderer(), m_descriptor_pool_sizes); + } + + void descriptor_table_t::validate() const + { + // Check for overlapping locations + std::set taken_locations; + + for (auto& type_arr : m_inputs) + { + if (type_arr.empty() || + type_arr.front().type == input_type_push_constant) + { + continue; + } + + for (const auto& input : type_arr) + { + ensure(taken_locations.find(input.location) == taken_locations.end(), "Overlapping input locations found."); + taken_locations.insert(input.location); + } + } } } } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index 06dbaf877f..c7faaa8c37 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -7,6 +7,7 @@ #include #include +#include namespace vk { @@ -15,18 +16,20 @@ namespace vk enum program_input_type : u32 { input_type_uniform_buffer = 0, - input_type_texel_buffer = 1, - input_type_texture = 2, - input_type_storage_buffer = 3, + input_type_texel_buffer, + input_type_texture, + input_type_storage_buffer, + input_type_storage_texture, + input_type_push_constant, - input_type_max_enum = 4 + input_type_max_enum }; struct bound_sampler { - VkFormat format; - VkImage image; - VkComponentMapping mapping; + VkFormat format = VK_FORMAT_UNDEFINED; + VkImage image = VK_NULL_HANDLE; + VkComponentMapping mapping{}; }; struct bound_buffer @@ -37,16 +40,50 @@ namespace vk u64 size = 0; }; + struct push_constant_ref + { + u32 offset = 0; + u32 size = 0; + }; + struct program_input { ::glsl::program_domain domain; program_input_type type; - bound_buffer as_buffer; - bound_sampler as_sampler; + using bound_data_t = std::variant; + bound_data_t bound_data; - u32 location; + u32 set = 0; + u32 location = umax; std::string name; + + inline bound_buffer& as_buffer() { return *std::get_if(&bound_data); } + inline bound_sampler& as_sampler() { return *std::get_if(&bound_data); } + inline push_constant_ref& as_push_constant() { return *std::get_if(&bound_data); } + + inline const bound_buffer& as_buffer() const { return *std::get_if(&bound_data); } + inline const bound_sampler& as_sampler() const { return *std::get_if(&bound_data); } + inline const push_constant_ref& as_push_constant() const { return *std::get_if(&bound_data); } + + static program_input make( + ::glsl::program_domain domain, + const std::string& name, + program_input_type type, + u32 set, + u32 location, + const bound_data_t& data = bound_buffer{}) + { + return program_input + { + .domain = domain, + .type = type, + .bound_data = data, + .set = set, + .location = location, + .name = name + }; + } }; class shader @@ -72,40 +109,103 @@ namespace vk VkShaderModule get_handle() const; }; + struct descriptor_array_ref_t + { + u32 first = 0; + u32 count = 0; + }; + + using descriptor_slot_t = std::variant; + + struct descriptor_table_t + { + VkDevice m_device = VK_NULL_HANDLE; + std::array, input_type_max_enum> m_inputs; + + std::unique_ptr m_descriptor_pool; + VkDescriptorSetLayout m_descriptor_set_layout = VK_NULL_HANDLE; + vk::descriptor_set m_descriptor_set{}; + rsx::simple_array m_descriptor_pool_sizes; + rsx::simple_array m_descriptor_types; + + std::vector m_descriptor_slots; + std::vector m_descriptors_dirty; + bool m_any_descriptors_dirty = false; + + rsx::simple_array< VkDescriptorImageInfo> m_scratch_images_array; + + void init(VkDevice dev); + void destroy(); + + void validate() const; + + void create_descriptor_set_layout(); + void create_descriptor_pool(); + + VkDescriptorSet allocate_descriptor_set(); + VkDescriptorSet commit(); + + template + inline void notify_descriptor_slot_updated(u32 slot, const T& data) + { + m_descriptors_dirty[slot] = true; + m_descriptor_slots[slot] = data; + m_any_descriptors_dirty = true; + } + }; + + enum binding_set_index : u32 + { + // For separate shader objects + binding_set_index_vertex = 0, + binding_set_index_fragment = 1, + + // Aliases + binding_set_index_compute = 0, + binding_set_index_unified = 0, + + // Meta + binding_set_index_max_enum = 2, + }; + class program { - std::array, input_type_max_enum> uniforms; - VkDevice m_device; + VkDevice m_device = VK_NULL_HANDLE; + VkPipeline m_pipeline = VK_NULL_HANDLE; + VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; - std::array fs_texture_bindings; - std::array fs_texture_mirror_bindings; - std::array vs_texture_bindings; - bool linked; + std::variant m_info; + std::array m_sets; + bool m_linked = false; - void create_impl(); + void init(); + void create_pipeline_layout(); + + program& load_uniforms(const std::vector& inputs); public: - VkPipeline pipeline; - VkPipelineLayout pipeline_layout; - u64 attribute_location_mask; - u64 vertex_attributes_mask; - program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs); - program(VkDevice dev, VkPipeline p, VkPipelineLayout layout); + program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs); + program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs); program(const program&) = delete; program(program&& other) = delete; ~program(); - program& load_uniforms(const std::vector& inputs); - program& link(); + program& link(bool separate_stages); + program& bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point); bool has_uniform(program_input_type type, const std::string &uniform_name); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type, vk::descriptor_set &set); - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror = false); - void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set); - void bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set); - void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set); - void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set); + std::pair get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name); + + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, u32 set_id, u32 binding_point); + void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point); + void bind_uniform(const VkBufferView &buffer_view, ::glsl::program_domain domain, program_input_type type, const std::string &binding_name); + + void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point); + + inline VkPipelineLayout layout() const { return m_pipeline_layout; } + inline VkPipeline value() const { return m_pipeline; } }; } } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index caa85dcc84..3c3ef0acbd 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -154,7 +154,7 @@ namespace vk // If we have driver support for FBO loops, set the usage flag for it. if (vk::get_current_renderer()->get_framebuffer_loops_support()) { - return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, 0 }; + return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT }; } // Workarounds to force transition to GENERAL to decompress. diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 7cf6631b67..23d243b032 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -16,50 +16,47 @@ namespace vk u32 cs_wave_y = 1; cs_resolve_base() - {} + { + ssbo_count = 0; + } virtual ~cs_resolve_base() {} void build(const std::string& format_prefix, bool unresolve, bool bgra_swap); - std::vector> get_descriptor_layout() override - { - return - { - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2 } - }; - } - - void declare_inputs() override + std::vector get_inputs() override { std::vector inputs = { - { + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, + "multisampled", + glsl::input_type_storage_texture, 0, - "multisampled" - }, - { + 0 + ), + + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 1, - "resolve" - } + "resolve", + glsl::input_type_storage_texture, + 0, + 1 + ), }; - m_program->load_uniforms(inputs); + auto result = compute_task::get_inputs(); + result.insert(result.end(), inputs.begin(), inputs.end()); + return result; } - void bind_resources() override + void bind_resources(const vk::command_buffer& /*cmd*/) override { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); - m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); - m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, 0, 0); + m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, 0, 1); } void run(const vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image) @@ -116,19 +113,23 @@ namespace vk void build(bool resolve_depth, bool resolve_stencil, bool unresolve); - std::vector get_push_constants() override + std::vector get_fragment_inputs() override { - VkPushConstantRange constant; - constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - constant.offset = 0; - constant.size = 16; - - return { constant }; + auto result = overlay_pass::get_fragment_inputs(); + result.push_back(glsl::program_input::make( + ::glsl::glsl_fragment_program, + "push_constants", + glsl::input_type_push_constant, + 0, + umax, + glsl::push_constant_ref{ .size = 16 } + )); + return result; } - void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override + void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override { - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); } void update_sample_configuration(vk::image* msaa_image) @@ -226,16 +227,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd) override + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, ®ion); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } @@ -285,16 +286,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd) override + void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, &clear_region); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd); + overlay_pass::emit_geometry(cmd, program); } } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index 3c9188fd60..c80fb7873d 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -12,23 +12,70 @@ namespace vk { - glsl::shader* shader_interpreter::build_vs(u64 compiler_options) + u32 shader_interpreter::init(VKVertexProgram* vk_prog, u64 compiler_options) const + { + std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); + + u32 location = 0; + vk_prog->binding_table.vertex_buffers_location = location; + location += 3; + + vk_prog->binding_table.context_buffer_location = location++; + + if (vk::emulate_conditional_rendering()) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) + { + vk_prog->binding_table.instanced_lut_buffer_location = location++; + vk_prog->binding_table.instanced_cbuf_location = location++; + } + else + { + vk_prog->binding_table.cbuf_location = location++; + } + + if (vk::emulate_conditional_rendering()) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + // Return next index + return location; + } + + u32 shader_interpreter::init(VKFragmentProgram* vk_prog, u64 /*compiler_opt*/) const + { + std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); + + vk_prog->binding_table.context_buffer_location = 0; + vk_prog->binding_table.tex_param_location = 1; + vk_prog->binding_table.polygon_stipple_params_location = 2; + + // Return next index + return 3; + } + + VKVertexProgram* shader_interpreter::build_vs(u64 compiler_options) { ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_vertex_program; properties.require_lit_emulation = true; - // TODO: Extend decompiler thread - // TODO: Rename decompiler thread, it no longer spawns a thread RSXVertexProgram null_prog; std::string shader_str; ParamArray arr; - VKVertexProgram vk_prog; + + // Initialize binding layout + auto vk_prog = std::make_unique(); + m_vertex_instruction_start = init(vk_prog.get(), compiler_options); null_prog.ctrl = (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) ? RSX_SHADER_CONTROL_INSTANCED_CONSTANTS : 0; - VKVertexDecompilerThread comp(null_prog, shader_str, arr, vk_prog); + VKVertexDecompilerThread comp(null_prog, shader_str, arr, *vk_prog); // Initialize compiler properties comp.properties.has_indexed_constants = true; @@ -52,6 +99,12 @@ namespace vk " uvec4 vp_instructions[];\n" "};\n\n"; + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES) + { + // FIXME: Unimplemented + rsx_log.todo("Vertex textures are currently not implemented for the shader interpreter."); + } + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) { builder << "#define _ENABLE_INSTANCED_CONSTANTS\n"; @@ -68,48 +121,29 @@ namespace vk builder << program_common::interpreter::get_vertex_interpreter(); const std::string s = builder.str(); - auto vs = std::make_unique(); + auto vs = &vk_prog->shader; vs->create(::glsl::program_domain::glsl_vertex_program, s); vs->compile(); - // Prepare input table - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + // Declare local inputs + auto vs_inputs = comp.get_inputs(); + vk::glsl::program_input in; - - in.location = binding_table.vertex_params_bind_slot; + in.set = 0; in.domain = ::glsl::glsl_vertex_program; - in.name = "VertexContextBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_vs_inputs.push_back(in); + in.location = m_vertex_instruction_start; + in.type = glsl::input_type_storage_buffer; + in.name = "VertexInstructionBlock"; + vs_inputs.push_back(in); - in.location = binding_table.vertex_buffers_first_bind_slot; - in.name = "persistent_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); + vk_prog->SetInputs(vs_inputs); - in.location = binding_table.vertex_buffers_first_bind_slot + 1; - in.name = "volatile_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); - - in.location = binding_table.vertex_buffers_first_bind_slot + 2; - in.name = "vertex_layout_stream"; - in.type = vk::glsl::input_type_texel_buffer; - m_vs_inputs.push_back(in); - - in.location = binding_table.vertex_constant_buffers_bind_slot; - in.name = "VertexConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_vs_inputs.push_back(in); - - // TODO: Bind textures if needed - - auto ret = vs.get(); - m_shader_cache[compiler_options].m_vs = std::move(vs); + auto ret = vk_prog.get(); + m_shader_cache[compiler_options].m_vs = std::move(vk_prog); return ret; } - glsl::shader* shader_interpreter::build_fs(u64 compiler_options) + VKFragmentProgram* shader_interpreter::build_fs(u64 compiler_options) { [[maybe_unused]] ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_fragment_program; @@ -120,10 +154,13 @@ namespace vk ParamArray arr; std::string shader_str; RSXFragmentProgram frag; - VKFragmentProgram vk_prog; - VKFragmentDecompilerThread comp(shader_str, arr, frag, len, vk_prog); - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + auto vk_prog = std::make_unique(); + m_fragment_instruction_start = init(vk_prog.get(), compiler_options); + m_fragment_textures_start = m_fragment_instruction_start + 1; + + VKFragmentDecompilerThread comp(shader_str, arr, frag, len, *vk_prog); + std::stringstream builder; builder << "#version 450\n" @@ -199,7 +236,7 @@ namespace vk for (int i = 0, bind_location = m_fragment_textures_start; i < 4; ++i) { - builder << "layout(set=0, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; + builder << "layout(set=1, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; } builder << "\n" @@ -211,7 +248,7 @@ namespace vk } builder << - "layout(std430, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" + "layout(std430, set=1, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" "{\n" " uint shader_control;\n" " uint texture_control;\n" @@ -223,182 +260,55 @@ namespace vk builder << program_common::interpreter::get_fragment_interpreter(); const std::string s = builder.str(); - auto fs = std::make_unique(); + auto fs = &vk_prog->shader; fs->create(::glsl::program_domain::glsl_fragment_program, s); fs->compile(); - // Prepare input table + // Declare local inputs + auto inputs = comp.get_inputs(); + vk::glsl::program_input in; - in.location = binding_table.fragment_constant_buffers_bind_slot; + in.set = 1; in.domain = ::glsl::glsl_fragment_program; - in.name = "FragmentConstantsBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - m_fs_inputs.push_back(in); + in.location = m_fragment_instruction_start; + in.type = glsl::input_type_storage_buffer; + in.name = "FragmentInstructionBlock"; + inputs.push_back(in); - in.location = binding_table.fragment_state_bind_slot; - in.name = "FragmentStateBuffer"; - m_fs_inputs.push_back(in); - - in.location = binding_table.fragment_texture_params_bind_slot; - in.name = "TextureParametersBuffer"; - m_fs_inputs.push_back(in); - - for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) + if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) { - in.location = location; - in.name = std::string(type_names[i]) + "_array[16]"; - m_fs_inputs.push_back(in); + for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) + { + in.location = location; + in.name = std::string(type_names[i]) + "_array[16]"; + in.type = glsl::input_type_texture; + inputs.push_back(in); + } } - auto ret = fs.get(); - m_shader_cache[compiler_options].m_fs = std::move(fs); + vk_prog->SetInputs(inputs); + + auto ret = vk_prog.get(); + m_shader_cache[compiler_options].m_fs = std::move(vk_prog); return ret; } - std::pair shader_interpreter::create_layout(VkDevice dev) - { - const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); - auto bindings = get_common_binding_table(); - u32 idx = ::size32(bindings); - - bindings.resize(binding_table.total_descriptor_bindings); - - // Texture 1D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot; - bindings[idx].pImmutableSamplers = nullptr; - - m_fragment_textures_start = bindings[idx].binding; - idx++; - - // Texture 2D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 1; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Texture 3D array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 2; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Texture CUBE array - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 16; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 3; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Vertex texture array (2D only) - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - bindings[idx].descriptorCount = 4; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 4; - bindings[idx].pImmutableSamplers = nullptr; - - idx++; - - // Vertex program ucode block - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 5; - bindings[idx].pImmutableSamplers = nullptr; - - m_vertex_instruction_start = bindings[idx].binding; - idx++; - - // Fragment program ucode block - bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - bindings[idx].descriptorCount = 1; - bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; - bindings[idx].binding = binding_table.textures_first_bind_slot + 6; - bindings[idx].pImmutableSamplers = nullptr; - - m_fragment_instruction_start = bindings[idx].binding; - idx++; - bindings.resize(idx); - - m_descriptor_pool_sizes = get_descriptor_pool_sizes(bindings); - - std::array push_constants; - push_constants[0].offset = 0; - push_constants[0].size = 16; - push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; - - if (vk::emulate_conditional_rendering()) - { - // Conditional render toggle - push_constants[0].size = 20; - } - - const auto set_layout = vk::descriptors::create_layout(bindings); - - VkPipelineLayoutCreateInfo layout_info = {}; - layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - layout_info.setLayoutCount = 1; - layout_info.pSetLayouts = &set_layout; - layout_info.pushConstantRangeCount = 1; - layout_info.pPushConstantRanges = push_constants.data(); - - VkPipelineLayout result; - CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); - return { set_layout, result }; - } - - void shader_interpreter::create_descriptor_pools(const vk::render_device& dev) - { - const auto max_draw_calls = dev.get_descriptor_max_draw_calls(); - m_descriptor_pool.create(dev, m_descriptor_pool_sizes, max_draw_calls); - } - void shader_interpreter::init(const vk::render_device& dev) { m_device = dev; - std::tie(m_shared_descriptor_layout, m_shared_pipeline_layout) = create_layout(dev); - create_descriptor_pools(dev); } void shader_interpreter::destroy() { m_program_cache.clear(); - m_descriptor_pool.destroy(); - - for (auto &fs : m_shader_cache) - { - fs.second.m_vs->destroy(); - fs.second.m_fs->destroy(); - } - m_shader_cache.clear(); - - if (m_shared_pipeline_layout) - { - vkDestroyPipelineLayout(m_device, m_shared_pipeline_layout, nullptr); - m_shared_pipeline_layout = VK_NULL_HANDLE; - } - - if (m_shared_descriptor_layout) - { - vkDestroyDescriptorSetLayout(m_device, m_shared_descriptor_layout, nullptr); - m_shared_descriptor_layout = VK_NULL_HANDLE; - } } glsl::program* shader_interpreter::link(const vk::pipeline_props& properties, u64 compiler_opt) { - glsl::shader *fs, *vs; + VKVertexProgram* vs; + VKFragmentProgram* fs; + if (auto found = m_shader_cache.find(compiler_opt); found != m_shader_cache.end()) { fs = found->second.m_fs.get(); @@ -413,12 +323,12 @@ namespace vk VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = vs->get_handle(); + shader_stages[0].module = vs->shader.get_handle(); shader_stages[0].pName = "main"; shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = fs->get_handle(); + shader_stages[1].module = fs->shader.get_handle(); shader_stages[1].pName = "main"; std::vector dynamic_state_descriptors = @@ -478,33 +388,42 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = m_shared_pipeline_layout; + info.layout = VK_NULL_HANDLE; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, m_shared_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); + auto program = compiler->compile( + info, + vk::pipe_compiler::COMPILE_INLINE | vk::pipe_compiler::SEPARATE_SHADER_OBJECTS, + {}, + vs->uniforms, + fs->uniforms); + return program.release(); } - void shader_interpreter::update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set) + void shader_interpreter::update_fragment_textures(const std::array& sampled_images) { - const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); - for (u32 i = 0, binding = m_fragment_textures_start; i < 4; ++i, ++binding, texture_ptr += 16) + // FIXME: Cannot use m_fragment_textures.start now since each interpreter has its own binding layout + auto [set, binding] = m_current_interpreter->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "sampler1D_array[16]"); + if (binding == umax) { - set.push(texture_ptr, 16, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); + return; } - } - VkDescriptorSet shader_interpreter::allocate_descriptor_set() - { - return m_descriptor_pool.allocate(m_shared_descriptor_layout); + const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); + for (u32 i = 0; i < 4; ++i, ++binding, texture_ptr += 16) + { + m_current_interpreter->bind_uniform_array(texture_ptr, 16, set, binding); + } } glsl::program* shader_interpreter::get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, + const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, u32 vp_ctrl, u32 fp_ctrl) { @@ -544,11 +463,12 @@ namespace vk if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; - if (metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; - if (metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; - if (metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; + if (fp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + if (fp_metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; + if (fp_metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; if (rsx::method_registers.polygon_stipple_enabled()) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING; if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING; + if (vp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES; if (m_current_key == key) [[likely]] { @@ -585,4 +505,16 @@ namespace vk { return m_fragment_instruction_start; } + + std::pair shader_interpreter::get_shaders() const + { + if (auto found = m_shader_cache.find(m_current_key.compiler_opt); found != m_shader_cache.end()) + { + auto fs = found->second.m_fs.get(); + auto vs = found->second.m_vs.get(); + return { vs, fs }; + } + + return { nullptr, nullptr }; + } }; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h index d359ca343e..9d934b3ffa 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -5,6 +5,9 @@ #include "vkutils/descriptors.h" #include +class VKVertexProgram; +class VKFragmentProgram; + namespace vk { using ::program_hash_util::fragment_program_utils; @@ -12,12 +15,7 @@ namespace vk class shader_interpreter { - std::vector m_vs_inputs; - std::vector m_fs_inputs; - VkDevice m_device = VK_NULL_HANDLE; - VkDescriptorSetLayout m_shared_descriptor_layout = VK_NULL_HANDLE; - VkPipelineLayout m_shared_pipeline_layout = VK_NULL_HANDLE; glsl::program* m_current_interpreter = nullptr; struct pipeline_key @@ -41,14 +39,12 @@ namespace vk struct shader_cache_entry_t { - std::unique_ptr m_fs; - std::unique_ptr m_vs; + std::unique_ptr m_fs; + std::unique_ptr m_vs; }; std::unordered_map, key_hasher> m_program_cache; std::unordered_map m_shader_cache; - rsx::simple_array m_descriptor_pool_sizes; - vk::descriptor_pool m_descriptor_pool; u32 m_vertex_instruction_start = 0; u32 m_fragment_instruction_start = 0; @@ -56,29 +52,32 @@ namespace vk pipeline_key m_current_key{}; - std::pair create_layout(VkDevice dev); - void create_descriptor_pools(const vk::render_device& dev); - - glsl::shader* build_vs(u64 compiler_opt); - glsl::shader* build_fs(u64 compiler_opt); + VKVertexProgram* build_vs(u64 compiler_opt); + VKFragmentProgram* build_fs(u64 compiler_opt); glsl::program* link(const vk::pipeline_props& properties, u64 compiler_opt); + u32 init(VKVertexProgram* vk_prog, u64 compiler_opt) const; + u32 init(VKFragmentProgram* vk_prog, u64 compiler_opt) const; + public: void init(const vk::render_device& dev); void destroy(); glsl::program* get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, + const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, u32 vp_ctrl, u32 fp_ctrl); + // Retrieve the shader components that make up the current interpreter + std::pair get_shaders() const; + bool is_interpreter(const glsl::program* prog) const; u32 get_vertex_instruction_location() const; u32 get_fragment_instruction_location() const; - void update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set); - VkDescriptorSet allocate_descriptor_set(); + void update_fragment_textures(const std::array& sampled_images); }; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 60f33f49c5..70d0972984 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -6,7 +6,6 @@ #include "vkutils/device.h" #include "../Program/GLSLCommon.h" - std::string VKVertexDecompilerThread::getFloatTypeName(usz elementCount) { return glsl::getFloatTypeNameImpl(elementCount); @@ -27,14 +26,59 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri return glsl::compareFunctionImpl(f, Op0, Op1, scalar); } +void VKVertexDecompilerThread::prepareBindingTable() +{ + u32 location = 0; + vk_prog->binding_table.vertex_buffers_location = location; + location += 3; // Persistent verts, volatile and layout data + + vk_prog->binding_table.context_buffer_location = location++; + if (m_device_props.emulate_conditional_rendering) + { + vk_prog->binding_table.cr_pred_buffer_location = location++; + } + + std::memset(vk_prog->binding_table.vtex_location, 0xff, sizeof(vk_prog->binding_table.vtex_location)); + + for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) + { + const bool is_texture_type = PT.type.starts_with("sampler"); + + for (const ParamItem& PI : PT.items) + { + if (is_texture_type) + { + const int id = vk::get_texture_index(PI.name); + vk_prog->binding_table.vtex_location[id] = location++; + continue; + } + + if (PI.name.starts_with("vc[")) + { + if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) + { + vk_prog->binding_table.cbuf_location = location++; + continue; + } + + vk_prog->binding_table.instanced_lut_buffer_location = location++; + vk_prog->binding_table.instanced_cbuf_location = location++; + continue; + } + } + } +} + void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { + prepareBindingTable(); + OS << "#version 450\n\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; OS << - "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n" + "layout(std140, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform VertexContextBuffer\n" "{\n" " mat4 scale_offset_mat;\n" " ivec4 user_clip_enabled[2];\n" @@ -45,13 +89,31 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " float z_far;\n" "};\n\n"; + vk::glsl::program_input context_input = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_uniform_buffer, + .location = vk_prog->binding_table.context_buffer_location, + .name = "VertexContextBuffer" + }; + inputs.push_back(context_input); + if (m_device_props.emulate_conditional_rendering) { OS << - "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n" + "layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" "{\n" " uint conditional_rendering_predicate;\n" "};\n\n"; + + vk::glsl::program_input predicate_input = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_storage_buffer, + .location = vk_prog->binding_table.cr_pred_buffer_location, + .name = "EXT_Conditional_Rendering" + }; + inputs.push_back(predicate_input); } OS << @@ -63,52 +125,50 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " uint layout_ptr_offset;\n" " uint xform_constants_offset;\n"; + u32 push_constants_size = 5 * sizeof(u32); if (m_device_props.emulate_conditional_rendering) { + push_constants_size += sizeof(u32); OS << " uint conditional_rendering_enabled;\n"; } OS << "};\n\n"; - vk::glsl::program_input in; - in.location = m_binding_table.vertex_params_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "VertexContextBuffer"; - in.type = vk::glsl::input_type_uniform_buffer; - inputs.push_back(in); + vk::glsl::program_input push_constants = + { + .domain = glsl::glsl_vertex_program, + .type = vk::glsl::input_type_push_constant, + .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } + }; + inputs.push_back(push_constants); } void VKVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::vector& /*inputs*/) { - OS << "layout(set=0, binding=5) uniform usamplerBuffer persistent_input_stream;\n"; // Data stream with persistent vertex data (cacheable) - OS << "layout(set=0, binding=6) uniform usamplerBuffer volatile_input_stream;\n"; // Data stream with per-draw data (registers and immediate draw data) - OS << "layout(set=0, binding=7) uniform usamplerBuffer vertex_layout_stream;\n"; // Data stream defining vertex data layout + static const char* input_streams[] = + { + "persistent_input_stream", // Data stream with persistent vertex data (cacheable) + "volatile_input_stream", // Data stream with per-draw data (registers and immediate draw data) + "vertex_layout_stream" // Data stream defining vertex data layout" + }; - vk::glsl::program_input in; - in.location = m_binding_table.vertex_buffers_first_bind_slot; - in.domain = glsl::glsl_vertex_program; - in.name = "persistent_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); + int location = vk_prog->binding_table.vertex_buffers_location; + for (const auto& stream : input_streams) + { + OS << "layout(set=0, binding=" << location << ") uniform usamplerBuffer " << stream << ";\n"; - in.location = m_binding_table.vertex_buffers_first_bind_slot + 1; - in.domain = glsl::glsl_vertex_program; - in.name = "volatile_input_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); - - in.location = m_binding_table.vertex_buffers_first_bind_slot + 2; - in.domain = glsl::glsl_vertex_program; - in.name = "vertex_layout_stream"; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); + vk::glsl::program_input in; + in.location = location++; + in.domain = glsl::glsl_vertex_program; + in.name = stream; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); + } } void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) { vk::glsl::program_input in; - u32 location = m_binding_table.vertex_textures_first_bind_slot; - for (const ParamType &PT : constants) { for (const ParamItem &PI : PT.items) @@ -117,12 +177,12 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std { if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) { - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[];\n"; OS << "};\n\n"; - in.location = m_binding_table.vertex_constant_buffers_bind_slot; + in.location = vk_prog->binding_table.cbuf_location; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -133,26 +193,26 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std else { // 1. Bind indirection lookup buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_lut_buffer_location << ") readonly buffer InstancingData\n"; OS << "{\n"; OS << " int constants_addressing_lookup[];\n"; OS << "};\n\n"; - in.location = m_binding_table.instancing_lookup_table_bind_slot; + in.location = vk_prog->binding_table.instanced_lut_buffer_location; in.domain = glsl::glsl_vertex_program; in.name = "InstancingData"; in.type = vk::glsl::input_type_storage_buffer; inputs.push_back(in); // 2. Bind actual constants buffer - OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 instanced_constants_array[];\n"; OS << "};\n\n"; OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n"; - in.location = m_binding_table.instancing_constants_buffer_slot; + in.location = vk_prog->binding_table.instanced_cbuf_location; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -161,12 +221,10 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - if (PT.type == "sampler2D" || - PT.type == "samplerCube" || - PT.type == "sampler1D" || - PT.type == "sampler3D") + if (PT.type.starts_with("sampler")) { - in.location = location; + const int id = vk::get_texture_index(PI.name); + in.location = vk_prog->binding_table.vtex_location[id]; in.name = PI.name; in.type = vk::glsl::input_type_texture; @@ -190,7 +248,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - OS << "layout(set = 0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; } } } @@ -371,8 +429,6 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) void VKVertexDecompilerThread::Task() { m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering(); - m_binding_table = vk::g_render_device->get_pipeline_binding_table(); - m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 1bb6dfd91c..3422333fc6 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -15,7 +15,6 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog; - vk::pipeline_binding_table m_binding_table{}; struct { @@ -36,6 +35,8 @@ protected: void insertMainStart(std::stringstream &OS) override; void insertMainEnd(std::stringstream &OS) override; + void prepareBindingTable(); + const RSXVertexProgram &rsx_vertex_program; public: VKVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray&, class VKVertexProgram &dst) @@ -61,6 +62,19 @@ public: vk::glsl::shader shader; std::vector uniforms; + // Quick attribute indices + struct + { + u32 context_buffer_location = umax; // Vertex program context + u32 cr_pred_buffer_location = umax; // Conditional rendering predicate + u32 vertex_buffers_location = umax; // Vertex input streams (3) + u32 cbuf_location = umax; // Vertex program constants register file + u32 instanced_lut_buffer_location = umax; // Instancing redirection table + u32 instanced_cbuf_location = umax; // Instancing constants register file + u32 vtex_location[4]; // Vertex textures (inf) + + } binding_table; + void Decompile(const RSXVertexProgram& prog); void Compile(); void SetInputs(std::vector& inputs); diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index c256070490..dc6562289e 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -61,6 +61,9 @@ namespace vk // Fill with 0 to avoid sending incomplete/unused variables to the GPU memset(m_constants_buf, 0, sizeof(m_constants_buf)); + // No ssbo usage + ssbo_count = 0; + // Enable push constants use_push_constants = true; push_constants_size = push_constants_size_; @@ -68,39 +71,33 @@ namespace vk create(); } - std::vector> fsr_pass::get_descriptor_layout() - { - return - { - { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1 }, - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 } - }; - } - - void fsr_pass::declare_inputs() + std::vector fsr_pass::get_inputs() { std::vector inputs = { - { + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, + "InputTexture", + vk::glsl::input_type_texture, 0, - "InputTexture" - }, - { + 0 + ), + + glsl::program_input::make( ::glsl::program_domain::glsl_compute_program, - vk::glsl::program_input_type::input_type_texture, - {}, {}, - 1, - "OutputTexture" - } + "OutputTexture", + vk::glsl::input_type_storage_texture, + 0, + 1 + ), }; - m_program->load_uniforms(inputs); + auto result = compute_task::get_inputs(); + result.insert(result.end(), inputs.begin(), inputs.end()); + return result; } - void fsr_pass::bind_resources() + void fsr_pass::bind_resources(const vk::command_buffer& /*cmd*/) { // Bind relevant stuff if (!m_sampler) @@ -111,8 +108,8 @@ namespace vk VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); - m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, 0, 0); + m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, 0, 1); } void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size) @@ -122,6 +119,11 @@ namespace vk m_input_size = input_size; m_output_size = output_size; + if (!m_program) + { + load_program(cmd); + } + configure(cmd); constexpr auto wg_size = 16; @@ -158,7 +160,7 @@ namespace vk static_cast(src_image->width()), static_cast(src_image->height()), // Size of the raw image to upscale (in case viewport does not cover it all) static_cast(m_output_size.width), static_cast(m_output_size.height)); // Size of output viewport (target size) - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } rcas_pass::rcas_pass() @@ -177,7 +179,7 @@ namespace vk auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f); FsrRcasCon(&m_constants_buf[0], cas_attenuation); - vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } } // Namespace FidelityFX diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h index c5b5b30e73..7bff58b049 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h @@ -19,9 +19,8 @@ namespace vk size2u m_output_size; u32 m_constants_buf[20]; - std::vector> get_descriptor_layout() override; - void declare_inputs() override; - void bind_resources() override; + std::vector get_inputs() override; + void bind_resources(const vk::command_buffer&) override; virtual void configure(const vk::command_buffer& cmd) = 0; diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index 7293180e08..b5e62f33f6 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -14,44 +14,43 @@ namespace vk public: inline void flush_all() { + std::lock_guard lock(m_notifications_lock); + for (auto& set : m_notification_list) { set->flush(); } + + m_notification_list.clear(); } void register_(descriptor_set* set) { - // Rare event, upon creation of a new set tracker. - // Check for spurious 'new' events when the aux context is taking over - for (const auto& set_ : m_notification_list) - { - if (set_ == set) return; - } + std::lock_guard lock(m_notifications_lock); m_notification_list.push_back(set); - rsx_log.warning("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); + // rsx_log.notice("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); } void deregister(descriptor_set* set) { - for (auto it = m_notification_list.begin(); it != m_notification_list.end(); ++it) - { - if (*it == set) - { - *it = m_notification_list.back(); - m_notification_list.pop_back(); - break; - } - } + std::lock_guard lock(m_notifications_lock); - rsx_log.warning("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); + m_notification_list.erase_if(FN(x == set)); + // rsx_log.notice("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); + } + + void destroy() + { + std::lock_guard lock(m_notifications_lock); + m_notification_list.clear(); } dispatch_manager() = default; private: rsx::simple_array m_notification_list; + std::mutex m_notifications_lock; dispatch_manager(const dispatch_manager&) = delete; dispatch_manager& operator = (const dispatch_manager&) = delete; @@ -67,6 +66,11 @@ namespace vk g_fxo->get().flush_all(); } + void destroy() + { + g_fxo->get().destroy(); + } + VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings) { VkDescriptorSetLayoutCreateInfo infos = {}; @@ -88,17 +92,17 @@ namespace vk } else { - binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; + binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT; } } - binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; binding_infos.pNext = nullptr; binding_infos.bindingCount = ::size32(binding_flags); binding_infos.pBindingFlags = binding_flags.data(); infos.pNext = &binding_infos; - infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; + infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; VkDescriptorSetLayout result; CHECK_RESULT(vkCreateDescriptorSetLayout(*g_render_device, &infos, nullptr, &result)); @@ -295,11 +299,6 @@ namespace vk m_in_use = true; m_update_after_bind_mask = g_render_device->get_descriptor_update_after_bind_support(); - - if (m_update_after_bind_mask) - { - g_fxo->get().register_(this); - } } else if (m_push_type_mask & ~m_update_after_bind_mask) { @@ -333,11 +332,6 @@ namespace vk return &m_handle; } - VkDescriptorSet descriptor_set::value() const - { - return m_handle; - } - void descriptor_set::push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding) { m_push_type_mask |= (1ull << type); @@ -417,14 +411,24 @@ namespace vk if (m_pending_copies.empty()) [[likely]] { m_pending_copies = std::move(copy_cmd); + return; } - else + + m_pending_copies += copy_cmd; + } + + void descriptor_set::push(rsx::simple_array& write_cmds, u32 type_mask) + { + m_push_type_mask |= type_mask; + +#if !defined(__clang__) || (__clang_major__ >= 16) + if (m_pending_writes.empty()) [[unlikely]] { - const auto old_size = m_pending_copies.size(); - const auto new_size = copy_cmd.size() + old_size; - m_pending_copies.resize(new_size); - std::copy(copy_cmd.begin(), copy_cmd.end(), m_pending_copies.begin() + old_size); + m_pending_writes = std::move(write_cmds); + return; } +#endif + m_pending_writes += write_cmds; } void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) @@ -438,13 +442,32 @@ namespace vk m_dynamic_offsets[offset.location] = offset.value; } - void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) + void descriptor_set::on_bind() { - if ((m_push_type_mask & ~m_update_after_bind_mask) || (m_pending_writes.size() >= max_cache_size)) + if (!m_push_type_mask) + { + ensure(m_pending_writes.empty()); + return; + } + + // We have queued writes + if ((m_push_type_mask & ~m_update_after_bind_mask) || + (m_pending_writes.size() >= max_cache_size)) { flush(); + return; } + // Register for async flush + ensure(m_update_after_bind_mask); + g_fxo->get().register_(this); + } + + void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) + { + // Notify + on_bind(); + vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, ::size32(m_dynamic_offsets), m_dynamic_offsets.data()); } diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index 6c61488b6e..c2cf5deb20 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -94,15 +94,19 @@ namespace vk void swap(descriptor_set& other); descriptor_set& operator = (VkDescriptorSet set); + VkDescriptorSet value() const { return m_handle; } + operator bool() const { return m_handle != VK_NULL_HANDLE; } + VkDescriptorSet* ptr(); - VkDescriptorSet value() const; void push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding); void push(const VkDescriptorBufferInfo& buffer_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo& image_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo* image_info, u32 count, VkDescriptorType type, u32 binding); void push(rsx::simple_array& copy_cmd, u32 type_mask = umax); + void push(rsx::simple_array& write_cmds, u32 type_mask = umax); void push(const descriptor_set_dynamic_offset_t& offset); + void on_bind(); void bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout); void flush(); @@ -118,7 +122,7 @@ namespace vk rsx::simple_array m_image_info_pool; rsx::simple_array m_dynamic_offsets; -#ifdef __clang__ +#if defined(__clang__) && (__clang_major__ < 16) // Clang (pre 16.x) does not support LWG 2089, std::construct_at for POD types struct WriteDescriptorSetT : public VkWriteDescriptorSet { @@ -158,6 +162,7 @@ namespace vk { void init(); void flush(); + void destroy(); VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 85d9148834..907d692e85 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -813,7 +813,6 @@ namespace vk memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); - m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); if (g_cfg.video.disable_vulkan_mem_allocator) { @@ -1148,15 +1147,4 @@ namespace vk return result; } - - pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev) - { - pipeline_binding_table result{}; - - // Need to check how many samplers are supported by the driver - const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u); - result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers; - result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4; - return result; - } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 63e30d3d42..0511802aac 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -137,7 +137,6 @@ namespace vk physical_device* pgpu = nullptr; memory_type_mapping memory_map{}; gpu_formats_support m_formats_support{}; - pipeline_binding_table m_pipeline_binding_table{}; std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; @@ -168,7 +167,6 @@ namespace vk const physical_device& gpu() const { return *pgpu; } const memory_type_mapping& get_memory_mapping() const { return memory_map; } const gpu_formats_support& get_formats_support() const { return m_formats_support; } - const pipeline_binding_table& get_pipeline_binding_table() const { return m_pipeline_binding_table; } const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; } const custom_border_color_features& get_custom_border_color_support() const { return pgpu->custom_border_color_support; } const multidraw_features get_multidraw_support() const { return pgpu->multidraw_support; } @@ -206,7 +204,6 @@ namespace vk memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); - pipeline_binding_table get_pipeline_binding_table(const physical_device& dev); extern const render_device* g_render_device; } diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index f64e01200e..916284a6cd 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -189,4 +189,29 @@ namespace rsx EXPECT_EQ(arr[i], i + 1); } } + + TEST(SimpleArray, Merge) + { + rsx::simple_array arr{ 1 }; + rsx::simple_array arr2{ 2, 3, 4, 5, 6, 7, 8, 9 }; + rsx::simple_array arr3{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }; + + // Check small vector optimization + EXPECT_TRUE(arr.is_local_storage()); + + // Small vector optimization holds after append + arr += arr2; + EXPECT_TRUE(arr.is_local_storage()); + + // Exceed the boundary and we move into dynamic alloc + arr += arr3; + EXPECT_FALSE(arr.is_local_storage()); + + // Verify contents + EXPECT_EQ(arr.size(), 30); + for (int i = 0; i < 30; ++i) + { + EXPECT_EQ(arr[i], i + 1); + } + } }