diff --git a/rpcs3/Emu/RSX/Common/simple_array.hpp b/rpcs3/Emu/RSX/Common/simple_array.hpp index 033994547d..dfec324eeb 100644 --- a/rpcs3/Emu/RSX/Common/simple_array.hpp +++ b/rpcs3/Emu/RSX/Common/simple_array.hpp @@ -285,13 +285,6 @@ namespace rsx return pos; } - void operator += (const rsx::simple_array& that) - { - const auto old_size = _size; - resize(_size + that._size); - std::memcpy(data() + old_size, that.data(), that.size_bytes()); - } - void clear() { _size = 0; @@ -312,11 +305,6 @@ namespace rsx return _size * sizeof(Ty); } - u32 size_bytes32() const - { - return _size * sizeof(Ty); - } - u32 capacity() const { return _capacity; diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl index 84fdfdb8b7..de1992174f 100644 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/OverlayRenderFS.glsl @@ -14,8 +14,8 @@ R"( #define SAMPLER_MODE_TEXTURE2D 3 #ifdef VULKAN - layout(set=0, binding=0) uniform sampler2D fs0; - layout(set=0, binding=1) uniform sampler2DArray fs1; + layout(set=0, binding=1) uniform sampler2D fs0; + layout(set=0, binding=2) uniform sampler2DArray fs1; #else layout(binding=31) uniform sampler2D fs0; layout(binding=30) uniform sampler2DArray fs1; diff --git a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h index f89c058dec..5503a2870c 100644 --- a/rpcs3/Emu/RSX/Program/ShaderInterpreter.h +++ b/rpcs3/Emu/RSX/Program/ShaderInterpreter.h @@ -20,9 +20,8 @@ namespace program_common COMPILER_OPT_ENABLE_KIL = (1 << 11), COMPILER_OPT_ENABLE_STIPPLING = (1 << 12), COMPILER_OPT_ENABLE_INSTANCING = (1 << 13), - COMPILER_OPT_ENABLE_VTX_TEXTURES = (1 << 14), - COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_VTX_TEXTURES + COMPILER_OPT_MAX = COMPILER_OPT_ENABLE_INSTANCING }; static std::string get_vertex_interpreter() diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp index 815492caa3..01e5cc07aa 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.cpp @@ -35,31 +35,4 @@ namespace vk fmt::throw_exception("Unknown register name: %s", varying_register_name); } - - int get_texture_index(std::string_view name) - { - if (name.length() < 2) - { - fmt::throw_exception("Invalid texture name: '%s'", name); - } - - constexpr int max_index_length = 2; - const int name_length = static_cast(name.length()); - std::string index; - - for (int char_idx = name_length - max_index_length; char_idx < name_length; ++char_idx) - { - if (std::isdigit(name[char_idx])) - { - index += name[char_idx]; - } - } - - if (index.empty()) - { - fmt::throw_exception("Invalid texture name: '%s'", name); - } - - return std::atoi(index.c_str()); - } } diff --git a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h index b17eb83b11..b0920e27f5 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h +++ b/rpcs3/Emu/RSX/VK/VKCommonDecompiler.h @@ -6,6 +6,4 @@ namespace vk using namespace ::glsl; int get_varying_register_location(std::string_view varying_register_name); - - int get_texture_index(std::string_view name); } diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp index 76cda4d253..602d855d76 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.cpp @@ -8,6 +8,157 @@ namespace vk { + rsx::simple_array get_common_binding_table() + { + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + rsx::simple_array bindings(binding_table.instancing_constants_buffer_slot + 1); + + u32 idx = 0; + + // Vertex stream, one stream for cacheable data, one stream for transient data + for (int i = 0; i < 3; i++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.vertex_buffers_first_bind_slot + i; + bindings[idx].pImmutableSamplers = nullptr; + idx++; + } + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_constant_buffers_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_state_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.fragment_texture_params_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.vertex_constant_buffers_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; + bindings[idx].binding = binding_table.vertex_params_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.conditional_render_predicate_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.rasterizer_env_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.instancing_lookup_table_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.instancing_constants_buffer_slot; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + return bindings; + } + + std::tuple> + get_common_pipeline_layout(VkDevice dev) + { + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + auto bindings = get_common_binding_table(); + u32 idx = ::size32(bindings); + + bindings.resize(binding_table.total_descriptor_bindings); + + for (auto binding = binding_table.textures_first_bind_slot; + binding < binding_table.vertex_textures_first_bind_slot; + binding++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding; + bindings[idx].pImmutableSamplers = nullptr; + idx++; + } + + for (int i = 0; i < rsx::limits::vertex_textures_count; i++) + { + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.vertex_textures_first_bind_slot + i; + bindings[idx].pImmutableSamplers = nullptr; + idx++; + } + + ensure(idx == binding_table.total_descriptor_bindings); + + std::array push_constants; + push_constants[0].offset = 0; + push_constants[0].size = 20; + push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + if (vk::emulate_conditional_rendering()) + { + // Conditional render toggle + push_constants[0].size = 24; + } + + const auto set_layout = vk::descriptors::create_layout(bindings); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &set_layout; + layout_info.pushConstantRangeCount = 1; + layout_info.pPushConstantRanges = push_constants.data(); + + VkPipelineLayout result; + CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); + return std::make_tuple(result, set_layout, bindings); + } + rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings) { // Compile descriptor pool sizes diff --git a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h index e5ada45bf8..371d0ebf76 100644 --- a/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h +++ b/rpcs3/Emu/RSX/VK/VKCommonPipelineLayout.h @@ -5,6 +5,13 @@ namespace vk { + // Grab standard layout for decompiled RSX programs. Also used by the interpreter. + // FIXME: This generates a bloated monstrosity that needs to die. + std::tuple> get_common_pipeline_layout(VkDevice dev); + + // Returns the standard binding layout without texture slots. Those have special handling depending on the consumer. + rsx::simple_array get_common_binding_table(); + // Returns an array of pool sizes that can be used to generate a proper descriptor pool rsx::simple_array get_descriptor_pool_sizes(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 3df2c9695a..637642d8bf 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -8,43 +8,64 @@ namespace vk { - std::vector compute_task::get_inputs() + std::vector> compute_task::get_descriptor_layout() { - std::vector result; - for (unsigned i = 0; i < ssbo_count; ++i) - { - const auto input = glsl::program_input::make - ( - ::glsl::glsl_compute_program, - "ssbo" + std::to_string(i), - glsl::program_input_type::input_type_storage_buffer, - 0, - i - ); - result.push_back(input); - } - - if (use_push_constants && push_constants_size > 0) - { - const auto input = glsl::program_input::make - ( - ::glsl::glsl_compute_program, - "push_constants", - glsl::program_input_type::input_type_push_constant, - 0, - 0, - glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } - ); - result.push_back(input); - } - + std::vector> result; + result.emplace_back(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, ssbo_count); return result; } + void compute_task::init_descriptors() + { + rsx::simple_array descriptor_pool_sizes; + rsx::simple_array bindings; + + const auto layout = get_descriptor_layout(); + for (const auto &e : layout) + { + descriptor_pool_sizes.push_back({e.first, e.second}); + + for (unsigned n = 0; n < e.second; ++n) + { + bindings.push_back + ({ + u32(bindings.size()), + e.first, + 1, + VK_SHADER_STAGE_COMPUTE_BIT, + nullptr + }); + } + } + + // Reserve descriptor pools + m_descriptor_pool.create(*g_render_device, descriptor_pool_sizes); + m_descriptor_layout = vk::descriptors::create_layout(bindings); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &m_descriptor_layout; + + VkPushConstantRange push_constants{}; + if (use_push_constants) + { + push_constants.size = push_constants_size; + push_constants.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + + layout_info.pushConstantRangeCount = 1; + layout_info.pPushConstantRanges = &push_constants; + } + + CHECK_RESULT(vkCreatePipelineLayout(*g_render_device, &layout_info, nullptr, &m_pipeline_layout)); + } + void compute_task::create() { if (!initialized) { + init_descriptors(); + switch (vk::get_driver_vendor()) { case vk::driver_vendor::unknown: @@ -100,6 +121,10 @@ namespace vk m_program.reset(); m_param_buffer.reset(); + vkDestroyDescriptorSetLayout(*g_render_device, m_descriptor_layout, nullptr); + vkDestroyPipelineLayout(*g_render_device, m_pipeline_layout, nullptr); + m_descriptor_pool.destroy(); + initialized = false; } } @@ -117,23 +142,26 @@ namespace vk shader_stage.module = handle; shader_stage.pName = "main"; - VkComputePipelineCreateInfo create_info - { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = handle, - .pName = "main" - }, - }; + VkComputePipelineCreateInfo info{}; + info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + info.stage = shader_stage; + info.layout = m_pipeline_layout; + info.basePipelineIndex = -1; + info.basePipelineHandle = VK_NULL_HANDLE; auto compiler = vk::get_pipe_compiler(); - m_program = compiler->compile(create_info, vk::pipe_compiler::COMPILE_INLINE, {}, get_inputs()); + m_program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE); + declare_inputs(); } - bind_resources(cmd); - m_program->bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE); + ensure(m_used_descriptors < VK_MAX_COMPUTE_TASKS); + + m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout, VK_TRUE); + + bind_resources(); + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_program->pipeline); + m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline_layout); } void compute_task::run(const vk::command_buffer& cmd, u32 invocations_x, u32 invocations_y, u32 invocations_z) @@ -243,19 +271,15 @@ namespace vk m_src += suffix; } - void cs_shuffle_base::bind_resources(const vk::command_buffer& cmd) + void cs_shuffle_base::bind_resources() { - set_parameters(cmd); - m_program->bind_uniform({ m_data->value, m_data_offset, m_data_length }, 0, 0); + m_program->bind_buffer({ m_data->value, m_data_offset, m_data_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } - void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd) + void cs_shuffle_base::set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count) { - if (!m_params.empty()) - { - ensure(use_push_constants); - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, m_params.size_bytes32(), m_params.data()); - } + ensure(use_push_constants); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, count * 4, params); } void cs_shuffle_base::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset) @@ -293,15 +317,15 @@ namespace vk " uint stencil_offset;\n"; } - void cs_interleave_task::bind_resources(const vk::command_buffer& cmd) + void cs_interleave_task::bind_resources() { - set_parameters(cmd); - m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); + m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } void cs_interleave_task::run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset) { - m_params = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; + u32 parameters[4] = { data_length, zeta_offset - data_offset, stencil_offset - data_offset, 0 }; + set_parameters(cmd, parameters, 4); ensure(stencil_offset > data_offset); m_ssbo_length = stencil_offset + (data_length / 4) - data_offset; @@ -353,10 +377,10 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void cs_aggregator::bind_resources(const vk::command_buffer& /*cmd*/) + void cs_aggregator::bind_resources() { - m_program->bind_uniform({ src->value, 0, block_length }, 0, 0); - m_program->bind_uniform({ dst->value, 0, 4 }, 0, 1); + m_program->bind_buffer({ src->value, 0, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ dst->value, 0, 4 }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } void cs_aggregator::run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words) diff --git a/rpcs3/Emu/RSX/VK/VKCompute.h b/rpcs3/Emu/RSX/VK/VKCompute.h index 5d20a60391..4f9a3f2a3a 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.h +++ b/rpcs3/Emu/RSX/VK/VKCompute.h @@ -19,6 +19,12 @@ namespace vk std::unique_ptr m_program; std::unique_ptr m_param_buffer; + vk::descriptor_pool m_descriptor_pool; + descriptor_set m_descriptor_set; + VkDescriptorSetLayout m_descriptor_layout = nullptr; + VkPipelineLayout m_pipeline_layout = nullptr; + u32 m_used_descriptors = 0; + bool initialized = false; bool unroll_loops = true; bool use_push_constants = false; @@ -31,11 +37,15 @@ namespace vk compute_task() = default; virtual ~compute_task() { destroy(); } + virtual std::vector> get_descriptor_layout(); + + void init_descriptors(); + void create(); void destroy(); - virtual std::vector get_inputs(); - virtual void bind_resources(const vk::command_buffer& /*cmd*/) {} + virtual void bind_resources() {} + virtual void declare_inputs() {} void load_program(const vk::command_buffer& cmd); @@ -50,8 +60,6 @@ namespace vk u32 m_data_length = 0; u32 kernel_size = 1; - rsx::simple_array m_params; - std::string variables, work_kernel, loop_advance, suffix; std::string method_declarations; @@ -59,9 +67,9 @@ namespace vk void build(const char* function_name, u32 _kernel_size = 0); - void bind_resources(const vk::command_buffer& cmd) override; + void bind_resources() override; - void set_parameters(const vk::command_buffer& cmd); + void set_parameters(const vk::command_buffer& cmd, const u32* params, u8 count); void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_length, u32 data_offset = 0); }; @@ -127,7 +135,7 @@ namespace vk cs_interleave_task(); - void bind_resources(const vk::command_buffer& cmd) override; + void bind_resources() override; void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 data_offset, u32 data_length, u32 zeta_offset, u32 stencil_offset); }; @@ -344,10 +352,9 @@ namespace vk cs_shuffle_base::build(""); } - void bind_resources(const vk::command_buffer& cmd) override + void bind_resources() override { - set_parameters(cmd); - m_program->bind_uniform({ m_data->value, m_data_offset, m_ssbo_length }, 0, 0); + m_program->bind_buffer({ m_data->value, m_data_offset, m_ssbo_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } void run(const vk::command_buffer& cmd, const vk::buffer* data, u32 src_offset, u32 src_length, u32 dst_offset) @@ -364,7 +371,8 @@ namespace vk data_offset = src_offset; } - m_params = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; + u32 parameters[4] = { src_length, src_offset - data_offset, dst_offset - data_offset, 0 }; + set_parameters(cmd, parameters, 4); cs_shuffle_base::run(cmd, data, src_length, data_offset); } }; @@ -445,17 +453,15 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources(const vk::command_buffer& cmd) override + void bind_resources() override { - set_parameters(cmd); - - m_program->bind_uniform({ src_buffer->value, in_offset, block_length }, 0, 0); - m_program->bind_uniform({ dst_buffer->value, out_offset, block_length }, 0, 1); + m_program->bind_buffer({ src_buffer->value, in_offset, block_length }, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ dst_buffer->value, out_offset, block_length }, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, params.data); } void run(const vk::command_buffer& cmd, const vk::buffer* dst, u32 out_offset, const vk::buffer* src, u32 in_offset, u32 data_length, u32 width, u32 height, u32 depth, u32 mipmaps) override @@ -474,6 +480,7 @@ namespace vk params.logw = rsx::ceil_log2(width); params.logh = rsx::ceil_log2(height); params.logd = rsx::ceil_log2(depth); + set_parameters(cmd); const u32 num_bytes_per_invocation = (sizeof(_BlockType) * optimal_group_size); const u32 linear_invocations = utils::aligned_div(data_length, num_bytes_per_invocation); @@ -490,7 +497,7 @@ namespace vk cs_aggregator(); - void bind_resources(const vk::command_buffer& cmd) override; + void bind_resources() override; void run(const vk::command_buffer& cmd, const vk::buffer* dst, const vk::buffer* src, u32 num_words); }; @@ -574,18 +581,16 @@ namespace vk m_src = fmt::replace_all(m_src, syntax_replace); } - void bind_resources(const vk::command_buffer& cmd) override + void bind_resources() override { - set_parameters(cmd); - - const auto op = static_cast(Op); - m_program->bind_uniform({ src_buffer->value, in_offset, in_block_length }, 0u, 0u ^ op); - m_program->bind_uniform({ dst_buffer->value, out_offset, out_block_length }, 0u, 1u ^ op); + const auto op = static_cast(Op); + m_program->bind_buffer({ src_buffer->value, in_offset, in_block_length }, 0 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); + m_program->bind_buffer({ dst_buffer->value, out_offset, out_block_length }, 1 ^ op, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_descriptor_set); } void set_parameters(const vk::command_buffer& cmd) { - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, ¶ms); } void run(const vk::command_buffer& cmd, const RSX_detiler_config& config) @@ -648,6 +653,7 @@ namespace vk params.image_height = (Op == RSX_detiler_op::decode) ? tile_aligned_height : config.image_height; params.image_pitch = config.image_pitch; params.image_bpp = config.image_bpp; + set_parameters(cmd); const u32 subtexels_per_invocation = (config.image_bpp < 4) ? (4 / config.image_bpp) : 1; const u32 virtual_width = config.image_width / subtexels_per_invocation; diff --git a/rpcs3/Emu/RSX/VK/VKDraw.cpp b/rpcs3/Emu/RSX/VK/VKDraw.cpp index 2a6d195bef..3b760f103f 100644 --- a/rpcs3/Emu/RSX/VK/VKDraw.cpp +++ b/rpcs3/Emu/RSX/VK/VKDraw.cpp @@ -554,8 +554,9 @@ bool VKGSRender::bind_texture_env() if (view) [[likely]] { m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - vk::glsl::binding_set_index_fragment, - m_fs_binding_table->ftex_location[i]); + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { @@ -575,22 +576,27 @@ bool VKGSRender::bind_texture_env() } m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - vk::glsl::binding_set_index_fragment, - m_fs_binding_table->ftex_stencil_location[i]); + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set, + true); } } else { const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - vk::glsl::binding_set_index_fragment, - m_fs_binding_table->ftex_location[i]); + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set); if (current_fragment_program.texture_state.redirected_textures & (1 << i)) { m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - vk::glsl::binding_set_index_fragment, - m_fs_binding_table->ftex_stencil_location[i]); + i, + ::glsl::program_domain::glsl_fragment_program, + m_current_frame->descriptor_set, + true); } } } @@ -604,8 +610,9 @@ bool VKGSRender::bind_texture_env() { const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - vk::glsl::binding_set_index_vertex, - m_vs_binding_table->vtex_location[i]); + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); continue; } @@ -627,8 +634,9 @@ bool VKGSRender::bind_texture_env() const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - vk::glsl::binding_set_index_vertex, - m_vs_binding_table->vtex_location[i]); + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); continue; } @@ -636,8 +644,9 @@ bool VKGSRender::bind_texture_env() validate_image_layout_for_read_access(*m_current_command_buffer, image_ptr, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, sampler_state); m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, - vk::glsl::binding_set_index_vertex, - m_vs_binding_table->vtex_location[i]); + i, + ::glsl::program_domain::glsl_vertex_program, + m_current_frame->descriptor_set); } return out_of_memory; @@ -712,7 +721,7 @@ bool VKGSRender::bind_interpreter_texture_env() } } - m_shader_interpreter.update_fragment_textures(texture_env); + m_shader_interpreter.update_fragment_textures(texture_env, m_current_frame->descriptor_set); return out_of_memory; } @@ -761,6 +770,9 @@ void VKGSRender::emit_geometry(u32 sub_index) return; } + const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + // Programs data is dependent on vertex state auto upload_info = upload_vertex_data(); if (!upload_info.vertex_draw_count) @@ -815,6 +827,8 @@ void VKGSRender::emit_geometry(u32 sub_index) auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; bool update_descriptors = false; + const auto& binding_table = m_device->get_pipeline_binding_table(); + if (m_current_draw.subdraw_id == 0) { update_descriptors = true; @@ -834,6 +848,33 @@ void VKGSRender::emit_geometry(u32 sub_index) vk::clear_status_interrupt(vk::heap_changed); } } + else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) + { + // Need to update descriptors; make a copy for the next draw + VkDescriptorSet previous_set = m_current_frame->descriptor_set.value(); + m_current_frame->descriptor_set.flush(); + m_current_frame->descriptor_set = allocate_descriptor_set(); + rsx::simple_array copy_cmds(binding_table.total_descriptor_bindings); + + for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) + { + copy_cmds[n] = + { + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType + nullptr, // pNext + previous_set, // srcSet + n, // srcBinding + 0u, // srcArrayElement + m_current_frame->descriptor_set.value(), // dstSet + n, // dstBinding + 0u, // dstArrayElement + 1u // descriptorCount + }; + } + + m_current_frame->descriptor_set.push(copy_cmds); + update_descriptors = true; + } // Update vertex fetch parameters update_vertex_env(sub_index, upload_info); @@ -841,9 +882,9 @@ void VKGSRender::emit_geometry(u32 sub_index) ensure(m_vertex_layout_storage); if (update_descriptors) { - m_program->bind_uniform(persistent_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location); - m_program->bind_uniform(volatile_buffer, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 1); - m_program->bind_uniform(m_vertex_layout_storage->value, vk::glsl::binding_set_index_vertex, m_vs_binding_table->vertex_buffers_location + 2); + m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); } bool reload_state = (!m_current_draw.subdraw_id++); @@ -867,12 +908,10 @@ void VKGSRender::emit_geometry(u32 sub_index) reload_state = true; }); - // Bind both pipe and descriptors in one go - // FIXME: We only need to rebind the pipeline when reload state is set. Flags? - m_program->bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS); - if (reload_state) { + vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); + update_draw_state(); begin_render_pass(); @@ -890,6 +929,7 @@ void VKGSRender::emit_geometry(u32 sub_index) } // Bind the new set of descriptors for use with this draw call + m_current_frame->descriptor_set.bind(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline_layout); m_frame_stats.setup_time += m_profiler.duration(); if (!upload_info.index_info) @@ -1043,6 +1083,9 @@ void VKGSRender::end() return; } + // Allocate descriptor set + m_current_frame->descriptor_set = allocate_descriptor_set(); + // Load program execution environment load_program_env(); m_frame_stats.setup_time += m_profiler.duration(); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index 040d528218..dd654a6736 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -26,85 +26,8 @@ std::string VKFragmentDecompilerThread::compareFunction(COMPARE f, const std::st return glsl::compareFunctionImpl(f, Op0, Op1); } -void VKFragmentDecompilerThread::prepareBindingTable() -{ - // First check if we have constants and textures as those need extra work - bool has_constants = false, has_textures = false; - for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) - { - if (has_constants && has_textures) - { - break; - } - - if (PT.type.starts_with("sampler")) - { - has_textures = true; - continue; - } - - ensure(PT.type.starts_with("vec")); - has_constants = true; - } - - unsigned location = 0; // All bindings must be set from this var - vk_prog->binding_table.context_buffer_location = location++; - if (has_constants) - { - vk_prog->binding_table.cbuf_location = location++; - } - - vk_prog->binding_table.tex_param_location = location++; - vk_prog->binding_table.polygon_stipple_params_location = location++; - - std::memset(vk_prog->binding_table.ftex_location, 0xff, sizeof(vk_prog->binding_table.ftex_location)); - std::memset(vk_prog->binding_table.ftex_stencil_location, 0xff, sizeof(vk_prog->binding_table.ftex_stencil_location)); - - if (has_textures) [[ likely ]] - { - for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) - { - if (!PT.type.starts_with("sampler")) - { - continue; - } - - for (const ParamItem& PI : PT.items) - { - const auto texture_id = vk::get_texture_index(PI.name); - const auto mask = 1u << texture_id; - - // Allocate real binding - vk_prog->binding_table.ftex_location[texture_id] = location++; - - // Tag the stencil mirror if required - if (properties.redirected_sampler_mask & mask) [[ unlikely ]] - { - vk_prog->binding_table.ftex_stencil_location[texture_id] = 0; - } - } - - // Normalize stencil offsets - if (properties.redirected_sampler_mask != 0) [[ unlikely ]] - { - for (auto& stencil_location : vk_prog->binding_table.ftex_stencil_location) - { - if (stencil_location != 0) - { - continue; - } - - stencil_location = location++; - } - } - } - } -} - void VKFragmentDecompilerThread::insertHeader(std::stringstream & OS) { - prepareBindingTable(); - std::vector required_extensions; if (device_props.has_native_half_support) @@ -174,18 +97,21 @@ void VKFragmentDecompilerThread::insertOutputs(std::stringstream & OS) void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) { + u32 location = m_binding_table.textures_first_bind_slot; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (!PT.type.starts_with("sampler")) - { + if (PT.type != "sampler1D" && + PT.type != "sampler2D" && + PT.type != "sampler3D" && + PT.type != "samplerCube") continue; - } for (const ParamItem& PI : PT.items) { std::string samplerType = PT.type; - const int index = vk::get_texture_index(PI.name); + ensure(PI.name.length() > 3); + int index = atoi(&PI.name[3]); const auto mask = (1 << index); if (properties.multisampled_sampler_mask & mask) @@ -209,37 +135,39 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) } } - const int id = vk::get_texture_index(PI.name); - auto in = vk::glsl::program_input::make( - glsl::glsl_fragment_program, - PI.name, - vk::glsl::input_type_texture, - vk::glsl::binding_set_index_fragment, - vk_prog->binding_table.ftex_location[id] - ); + vk::glsl::program_input in; + in.location = location; + in.domain = glsl::glsl_fragment_program; + in.name = PI.name; + in.type = vk::glsl::input_type_texture; + inputs.push_back(in); - OS << "layout(set=1, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set=0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; if (properties.redirected_sampler_mask & mask) { // Insert stencil mirror declaration in.name += "_stencil"; - in.location = vk_prog->binding_table.ftex_stencil_location[id]; + in.location = location; + inputs.push_back(in); - OS << "layout(set=1, binding=" << in.location << ") uniform u" << samplerType << " " << in.name << ";\n"; + OS << "layout(set=0, binding=" << location++ << ") uniform u" << samplerType << " " << in.name << ";\n"; } } } + ensure(location <= m_binding_table.vertex_textures_first_bind_slot); // "Too many sampler descriptors!" + std::string constants_block; for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) { - if (PT.type.starts_with("sampler")) - { + if (PT.type == "sampler1D" || + PT.type == "sampler2D" || + PT.type == "sampler3D" || + PT.type == "samplerCube") continue; - } for (const ParamItem& PI : PT.items) { @@ -249,13 +177,13 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) if (!constants_block.empty()) { - OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.cbuf_location << ") uniform FragmentConstantsBuffer\n"; + OS << "layout(std140, set = 0, binding = 2) uniform FragmentConstantsBuffer\n"; OS << "{\n"; OS << constants_block; OS << "};\n\n"; } - OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform FragmentStateBuffer\n"; + OS << "layout(std140, set = 0, binding = 3) uniform FragmentStateBuffer\n"; OS << "{\n"; OS << " float fog_param0;\n"; OS << " float fog_param1;\n"; @@ -267,39 +195,32 @@ void VKFragmentDecompilerThread::insertConstants(std::stringstream & OS) OS << " float wpos_bias;\n"; OS << "};\n\n"; - OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.tex_param_location << ") uniform TextureParametersBuffer\n"; + OS << "layout(std140, set = 0, binding = 4) uniform TextureParametersBuffer\n"; OS << "{\n"; OS << " sampler_info texture_parameters[16];\n"; OS << "};\n\n"; - OS << "layout(std140, set=1, binding=" << vk_prog->binding_table.polygon_stipple_params_location << ") uniform RasterizerHeap\n"; + OS << "layout(std140, set = 0, binding = " << std::to_string(m_binding_table.rasterizer_env_bind_slot) << ") uniform RasterizerHeap\n"; OS << "{\n"; OS << " uvec4 stipple_pattern[8];\n"; OS << "};\n\n"; - vk::glsl::program_input in - { - .domain = glsl::glsl_fragment_program, - .type = vk::glsl::input_type_uniform_buffer, - .set = vk::glsl::binding_set_index_fragment - }; + vk::glsl::program_input in; + in.location = m_binding_table.fragment_constant_buffers_bind_slot; + in.domain = glsl::glsl_fragment_program; + in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + inputs.push_back(in); - if (!constants_block.empty()) - { - in.location = vk_prog->binding_table.cbuf_location; - in.name = "FragmentConstantsBuffer"; - inputs.push_back(in); - } - - in.location = vk_prog->binding_table.context_buffer_location; + in.location = m_binding_table.fragment_state_bind_slot; in.name = "FragmentStateBuffer"; inputs.push_back(in); - in.location = vk_prog->binding_table.tex_param_location; + in.location = m_binding_table.fragment_texture_params_bind_slot; in.name = "TextureParametersBuffer"; inputs.push_back(in); - in.location = vk_prog->binding_table.polygon_stipple_params_location; + in.location = m_binding_table.rasterizer_env_bind_slot; in.name = "RasterizerHeap"; inputs.push_back(in); } @@ -451,6 +372,7 @@ void VKFragmentDecompilerThread::insertMainEnd(std::stringstream & OS) void VKFragmentDecompilerThread::Task() { + m_binding_table = vk::g_render_device->get_pipeline_binding_table(); m_shader = Decompile(); vk_prog->SetInputs(inputs); } @@ -484,7 +406,10 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) { for (const ParamItem& PI : PT.items) { - if (PT.type.starts_with("sampler")) + if (PT.type == "sampler1D" || + PT.type == "sampler2D" || + PT.type == "sampler3D" || + PT.type == "samplerCube") continue; usz offset = atoi(PI.name.c_str() + 2); diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h index c51b81b8fc..787f38ec05 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.h +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.h @@ -10,7 +10,7 @@ namespace vk class shader_interpreter; } -class VKFragmentDecompilerThread : public FragmentProgramDecompiler +struct VKFragmentDecompilerThread : public FragmentProgramDecompiler { friend class vk::shader_interpreter; @@ -19,8 +19,7 @@ class VKFragmentDecompilerThread : public FragmentProgramDecompiler std::vector inputs; class VKFragmentProgram *vk_prog; glsl::shader_properties m_shader_props{}; - - void prepareBindingTable(); + vk::pipeline_binding_table m_binding_table{}; public: VKFragmentDecompilerThread(std::string& shader, ParamArray& parr, const RSXFragmentProgram &prog, u32& size, class VKFragmentProgram& dst) @@ -33,7 +32,6 @@ public: void Task(); const std::vector& get_inputs() { return inputs; } - protected: std::string getFloatTypeName(usz elementCount) override; std::string getHalfTypeName(usz elementCount) override; @@ -65,19 +63,8 @@ public: std::vector FragmentConstantOffsetCache; std::array output_color_masks{ {} }; + std::vector uniforms; - - struct - { - u32 context_buffer_location = umax; // Rasterizer context - u32 cbuf_location = umax; // Constants register file - u32 tex_param_location = umax; // Texture configuration data - u32 polygon_stipple_params_location = umax; // Polygon stipple settings - u32 ftex_location[16]; // Texture locations array - u32 ftex_stencil_location[16]; // Texture stencil mirror array - - } binding_table; - void SetInputs(std::vector& inputs); /** * Decompile a fragment shader located in the PS3's Memory. This function operates synchronously. diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index b2003f645c..107cd7b399 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1,4 +1,3 @@ -#include "Emu/RSX/VK/vkutils/descriptors.h" #include "stdafx.h" #include "../Overlays/overlay_compile_notification.h" #include "../Overlays/Shaders/shader_loading_dialog_native.h" @@ -424,8 +423,8 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) std::vector& gpus = m_instance.enumerate_devices(); - // Actually confirm that the loader found at least one compatible device - // This should not happen unless something is wrong with the driver setup on the target system + //Actually confirm that the loader found at least one compatible device + //This should not happen unless something is wrong with the driver setup on the target system if (gpus.empty()) { //We can't throw in Emulator::Load, so we show error and return @@ -483,16 +482,20 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) swapchain_unavailable = true; } - // create command buffer... + //create command buffer... m_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_primary_cb_list.create(m_command_buffer_pool, vk::command_buffer::access_type_hint::flush_only); m_current_command_buffer = m_primary_cb_list.get(); m_current_command_buffer->begin(); - // Create secondary command_buffer for parallel operations + //Create secondary command_buffer for parallel operations m_secondary_command_buffer_pool.create((*m_device), m_device->get_graphics_queue_family()); m_secondary_cb_list.create(m_secondary_command_buffer_pool, vk::command_buffer::access_type_hint::all); + //Precalculated stuff + rsx::simple_array binding_layout; + std::tie(m_pipeline_layout, m_descriptor_layouts, binding_layout) = vk::get_common_pipeline_layout(*m_device); + //Occlusion m_occlusion_query_manager = std::make_unique(*m_device, VK_QUERY_TYPE_OCCLUSION, OCCLUSION_MAX_POOL_SIZE); m_occlusion_map.resize(rsx::reports::occlusion_query_count); @@ -505,6 +508,11 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) m_occlusion_query_manager->set_control_flags(VK_QUERY_CONTROL_PRECISE_BIT, 0); } + // Generate frame contexts + const u32 max_draw_calls = m_device->get_descriptor_max_draw_calls(); + const auto descriptor_type_sizes = vk::get_descriptor_pool_sizes(binding_layout); + m_descriptor_pool.create(*m_device, descriptor_type_sizes, max_draw_calls); + VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -844,6 +852,12 @@ VKGSRender::~VKGSRender() m_stencil_mirror_sampler.reset(); + // Pipeline descriptors + m_descriptor_pool.destroy(); + + vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); + vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layouts, nullptr); + // Queries m_occlusion_query_manager.reset(); m_cond_render_buffer.reset(); @@ -855,9 +869,6 @@ VKGSRender::~VKGSRender() m_command_buffer_pool.destroy(); m_secondary_command_buffer_pool.destroy(); - // Descriptors - vk::descriptors::flush(); - // Global resources vk::destroy_global_resources(); @@ -1146,6 +1157,18 @@ void VKGSRender::check_present_status() } } +VkDescriptorSet VKGSRender::allocate_descriptor_set() +{ + if (!m_shader_interpreter.is_interpreter(m_program)) [[likely]] + { + return m_descriptor_pool.allocate(m_descriptor_layouts, VK_TRUE); + } + else + { + return m_shader_interpreter.allocate_descriptor_set(); + } +} + void VKGSRender::set_viewport() { const auto [clip_width, clip_height] = rsx::apply_resolution_scale( @@ -1219,7 +1242,7 @@ void VKGSRender::on_init_thread() if (!m_overlay_manager) { m_frame->hide(); - m_shaders_cache->load(nullptr); + m_shaders_cache->load(nullptr, m_pipeline_layout); m_frame->show(); } else @@ -1227,7 +1250,7 @@ void VKGSRender::on_init_thread() rsx::shader_loading_dialog_native dlg(this); // TODO: Handle window resize messages during loading on GPUs without OUT_OF_DATE_KHR support - m_shaders_cache->load(&dlg); + m_shaders_cache->load(&dlg, m_pipeline_layout); } } @@ -1780,11 +1803,8 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, - current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); - - std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); return true; } } @@ -1850,7 +1870,7 @@ bool VKGSRender::load_program() vertex_program, fragment_program, m_pipeline_properties, - shadermode != shader_mode::recompiler, true); + shadermode != shader_mode::recompiler, true, m_pipeline_layout); vk::leave_uninterruptible(); @@ -1882,7 +1902,6 @@ bool VKGSRender::load_program() m_program = m_shader_interpreter.get( m_pipeline_properties, current_fp_metadata, - current_vp_metadata, current_vertex_program.ctrl, current_fragment_program.ctrl); @@ -1904,16 +1923,6 @@ bool VKGSRender::load_program() } } - if (m_program) - { - std::tie(m_vs_binding_table, m_fs_binding_table) = get_binding_table(); - } - else - { - m_vs_binding_table = nullptr; - m_fs_binding_table = nullptr; - } - return m_program != nullptr; } @@ -1925,14 +1934,13 @@ void VKGSRender::load_program_env() } const u32 fragment_constants_size = current_fp_metadata.program_constants_buffer_length; - const bool is_interpreter = m_shader_interpreter.is_interpreter(m_program); const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty); const bool update_fragment_constants = !!(m_graphics_state & rsx::pipeline_state::fragment_constants_dirty); const bool update_vertex_env = !!(m_graphics_state & rsx::pipeline_state::vertex_state_dirty); const bool update_fragment_env = !!(m_graphics_state & rsx::pipeline_state::fragment_state_dirty); const bool update_fragment_texture_env = !!(m_graphics_state & rsx::pipeline_state::fragment_texture_state_dirty); - const bool update_instruction_buffers = (!!m_interpreter_state && is_interpreter); + const bool update_instruction_buffers = (!!m_interpreter_state && m_shader_interpreter.is_interpreter(m_program)); const bool update_raster_env = (rsx::method_registers.polygon_stipple_enabled() && !!(m_graphics_state & rsx::pipeline_state::polygon_stipple_pattern_dirty)); const bool update_instancing_data = rsx::method_registers.current_draw_clause.is_trivial_instanced_draw; @@ -2093,36 +2101,34 @@ void VKGSRender::load_program_env() } } - m_program->bind_uniform(m_vertex_env_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->context_buffer_location); - m_program->bind_uniform(m_fragment_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->context_buffer_location); - m_program->bind_uniform(m_fragment_texture_params_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->tex_param_location); - m_program->bind_uniform(m_raster_env_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->polygon_stipple_params_location); + const auto& binding_table = m_device->get_pipeline_binding_table(); - if (m_vs_binding_table->cbuf_location != umax) - { - m_program->bind_uniform(m_vertex_constants_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cbuf_location); - } + m_program->bind_uniform(m_vertex_env_buffer_info, binding_table.vertex_params_bind_slot, m_current_frame->descriptor_set); + m_program->bind_buffer(m_vertex_constants_buffer_info, binding_table.vertex_constant_buffers_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_env_buffer_info, binding_table.fragment_state_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_fragment_texture_params_buffer_info, binding_table.fragment_texture_params_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(m_raster_env_buffer_info, binding_table.rasterizer_env_bind_slot, m_current_frame->descriptor_set); - if (m_shader_interpreter.is_interpreter(m_program)) + if (!m_shader_interpreter.is_interpreter(m_program)) { - m_program->bind_uniform(m_vertex_instructions_buffer_info, vk::glsl::binding_set_index_vertex, m_shader_interpreter.get_vertex_instruction_location()); - m_program->bind_uniform(m_fragment_instructions_buffer_info, vk::glsl::binding_set_index_fragment, m_shader_interpreter.get_fragment_instruction_location()); + m_program->bind_uniform(m_fragment_constants_buffer_info, binding_table.fragment_constant_buffers_bind_slot, m_current_frame->descriptor_set); } - else if (m_fs_binding_table->cbuf_location != umax) + else { - m_program->bind_uniform(m_fragment_constants_buffer_info, vk::glsl::binding_set_index_fragment, m_fs_binding_table->cbuf_location); + m_program->bind_buffer(m_vertex_instructions_buffer_info, m_shader_interpreter.get_vertex_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer(m_fragment_instructions_buffer_info, m_shader_interpreter.get_fragment_instruction_location(), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); } if (vk::emulate_conditional_rendering()) { auto predicate = m_cond_render_buffer ? m_cond_render_buffer->value : vk::get_scratch_buffer(*m_current_command_buffer, 4)->value; - m_program->bind_uniform({ predicate, 0, 4 }, vk::glsl::binding_set_index_vertex, m_vs_binding_table->cr_pred_buffer_location); + m_program->bind_buffer({ predicate, 0, 4 }, binding_table.conditional_render_predicate_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); } if (current_vertex_program.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) { - m_program->bind_uniform(m_instancing_indirection_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_lut_buffer_location); - m_program->bind_uniform(m_instancing_constants_array_buffer_info, vk::glsl::binding_set_index_vertex, m_vs_binding_table->instanced_cbuf_location); + m_program->bind_buffer(m_instancing_indirection_buffer_info, binding_table.instancing_lookup_table_bind_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); + m_program->bind_buffer(m_instancing_constants_array_buffer_info, binding_table.instancing_constants_buffer_slot, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, m_current_frame->descriptor_set); } // Clear flags @@ -2149,19 +2155,6 @@ void VKGSRender::load_program_env() m_graphics_state.clear(handled_flags); } -std::pair VKGSRender::get_binding_table() const -{ - ensure(m_program); - - if (!m_shader_interpreter.is_interpreter(m_program)) - { - return { &m_vertex_prog->binding_table, &m_fragment_prog->binding_table }; - } - - const auto& [vs, fs] = m_shader_interpreter.get_shaders(); - return { &vs->binding_table, &fs->binding_table }; -} - bool VKGSRender::is_current_program_interpreted() const { return m_program && m_shader_interpreter.is_interpreter(m_program); @@ -2222,7 +2215,7 @@ void VKGSRender::update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_ vkCmdPushConstants( *m_current_command_buffer, - m_program->layout(), + m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, data_length, diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index 107da8ebf2..e16d8d1afa 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -26,9 +26,6 @@ using namespace vk::vmm_allocation_pool_; // clang workaround. using namespace vk::upscaling_flags_; // ditto -using vs_binding_table_t = decltype(VKVertexProgram::binding_table); -using fs_binding_table_t = decltype(VKFragmentProgram::binding_table); - namespace vk { using host_data_t = rsx::host_gpu_context_t; @@ -56,9 +53,6 @@ private: vk::glsl::program *m_prev_program = nullptr; vk::pipeline_props m_pipeline_properties; - const vs_binding_table_t* m_vs_binding_table = nullptr; - const fs_binding_table_t* m_fs_binding_table = nullptr; - vk::texture_cache m_texture_cache; vk::surface_cache m_rtts; @@ -84,8 +78,6 @@ private: VkDependencyInfoKHR m_async_compute_dependency_info {}; VkMemoryBarrier2KHR m_async_compute_memory_barrier {}; - std::pair get_binding_table() const; - public: //vk::fbo draw_fbo; std::unique_ptr m_vertex_cache; @@ -113,6 +105,11 @@ private: vk::command_buffer_chunk* m_current_command_buffer = nullptr; std::unique_ptr m_host_object_data; + + vk::descriptor_pool m_descriptor_pool; + VkDescriptorSetLayout m_descriptor_layouts = VK_NULL_HANDLE; + VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; + vk::framebuffer_holder* m_draw_fbo = nullptr; sizeu m_swapchain_dims{}; @@ -223,6 +220,8 @@ private: void update_draw_state(); void check_present_status(); + VkDescriptorSet allocate_descriptor_set(); + vk::vertex_upload_info upload_vertex_data(); rsx::simple_array m_scratch_mem; diff --git a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp index 8f38378f52..acd4c42cb2 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp +++ b/rpcs3/Emu/RSX/VK/VKGSRenderTypes.hpp @@ -178,6 +178,8 @@ namespace vk VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; VkSemaphore present_wait_semaphore = VK_NULL_HANDLE; + vk::descriptor_set descriptor_set; + rsx::flags32_t flags = 0; u32 present_image = -1; @@ -191,6 +193,7 @@ namespace vk { present_wait_semaphore = other.present_wait_semaphore; acquire_signal_semaphore = other.acquire_signal_semaphore; + descriptor_set.swap(other.descriptor_set); flags = other.flags; heap_snapshot = other.heap_snapshot; } diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.cpp b/rpcs3/Emu/RSX/VK/VKOverlays.cpp index 34ca64ca66..f4e7d092f9 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.cpp +++ b/rpcs3/Emu/RSX/VK/VKOverlays.cpp @@ -43,46 +43,106 @@ namespace vk if (!m_vao.heap) { m_vao.create(VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, 1 * 0x100000, "overlays VAO", 128); - } - - if (!m_ubo.heap && m_num_uniform_buffers > 0) - { m_ubo.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 8 * 0x100000, "overlays UBO", 128); } } + void overlay_pass::init_descriptors() + { + rsx::simple_array descriptor_pool_sizes = {}; + + if (m_num_uniform_buffers) + { + descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, m_num_uniform_buffers }); + }; + + if (m_num_usable_samplers) + { + descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_num_usable_samplers }); + } + + if (m_num_input_attachments) + { + descriptor_pool_sizes.push_back({ VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, m_num_input_attachments }); + } + + // Reserve descriptor pools + m_descriptor_pool.create(*m_device, descriptor_pool_sizes); + + const auto num_bindings = m_num_uniform_buffers + m_num_usable_samplers + m_num_input_attachments; + rsx::simple_array bindings(num_bindings); + u32 binding_slot = 0; + + for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding_slot) + { + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; + } + + for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding_slot) + { + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; + } + + for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding_slot) + { + bindings[binding_slot].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; + bindings[binding_slot].descriptorCount = 1; + bindings[binding_slot].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[binding_slot].binding = binding_slot; + bindings[binding_slot].pImmutableSamplers = nullptr; + } + + ensure(binding_slot == num_bindings); + m_descriptor_layout = vk::descriptors::create_layout(bindings); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &m_descriptor_layout; + + std::vector push_constants = get_push_constants(); + if (!push_constants.empty()) + { + layout_info.pushConstantRangeCount = u32(push_constants.size()); + layout_info.pPushConstantRanges = push_constants.data(); + } + + CHECK_RESULT(vkCreatePipelineLayout(*m_device, &layout_info, nullptr, &m_pipeline_layout)); + } + std::vector overlay_pass::get_vertex_inputs() { check_heap(); - return {}; + return{}; } std::vector overlay_pass::get_fragment_inputs() { - using namespace vk::glsl; - - std::vector fs_inputs; + std::vector fs_inputs; u32 binding = 0; for (u32 n = 0; n < m_num_uniform_buffers; ++n, ++binding) { const std::string name = std::string("static_data") + (n > 0 ? std::to_string(n) : ""); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_uniform_buffer, 0, 0); - fs_inputs.push_back(input); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_uniform_buffer,{},{}, 0, name }); } for (u32 n = 0; n < m_num_usable_samplers; ++n, ++binding) { - const std::string name = "fs" + std::to_string(n); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); - fs_inputs.push_back(input); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "fs" + std::to_string(n) }); } for (u32 n = 0; n < m_num_input_attachments; ++n, ++binding) { - const std::string name = "sp" + std::to_string(n); - const auto input = program_input::make(::glsl::program_domain::glsl_fragment_program, name, program_input_type::input_type_texture, 0, binding); - fs_inputs.push_back(input); + fs_inputs.push_back({ ::glsl::program_domain::glsl_fragment_program, vk::glsl::program_input_type::input_type_texture,{},{}, binding, "sp" + std::to_string(n) }); } return fs_inputs; @@ -148,20 +208,20 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = VK_NULL_HANDLE; + info.layout = m_pipeline_layout; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = render_pass; auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile(info, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); + auto program = compiler->compile(info, m_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, get_vertex_inputs(), get_fragment_inputs()); auto result = program.get(); m_program_cache[storage_key] = std::move(program); return result; } - vk::glsl::program* overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) + void overlay_pass::load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src) { vk::glsl::program *program = nullptr; const auto key = get_pipeline_key(pass); @@ -172,6 +232,8 @@ namespace vk else program = build_pipeline(key, pass); + m_descriptor_set = m_descriptor_pool.allocate(m_descriptor_layout); + if (!m_sampler && !src.empty()) { m_sampler = std::make_unique(*m_device, @@ -183,23 +245,21 @@ namespace vk if (m_num_uniform_buffers > 0) { - program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, 0); + program->bind_uniform({ m_ubo.heap->value, m_ubo_offset, std::max(m_ubo_length, 4u) }, 0, m_descriptor_set); } for (uint n = 0; n < src.size(); ++n) { VkDescriptorImageInfo info = { m_sampler->value, src[n]->value, src[n]->image()->current_layout }; - const auto [set, location] = program->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "fs" + std::to_string(n)); - program->bind_uniform(info, set, location); + program->bind_uniform(info, "fs" + std::to_string(n), VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); } - program->bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, program->pipeline); + m_descriptor_set.bind(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline_layout); VkBuffer buffers = m_vao.heap->value; VkDeviceSize offsets = m_vao_offset; vkCmdBindVertexBuffers(cmd, 0, 1, &buffers, &offsets); - - return program; } void overlay_pass::create(const vk::render_device& dev) @@ -207,6 +267,8 @@ namespace vk if (!initialized) { m_device = &dev; + init_descriptors(); + initialized = true; } } @@ -220,6 +282,10 @@ namespace vk m_program_cache.clear(); m_sampler.reset(); + vkDestroyDescriptorSetLayout(*m_device, m_descriptor_layout, nullptr); + vkDestroyPipelineLayout(*m_device, m_pipeline_layout, nullptr); + m_descriptor_pool.destroy(); + initialized = false; } } @@ -237,7 +303,7 @@ namespace vk return vk::get_framebuffer(dev, target->width(), target->height(), m_num_input_attachments > 0, render_pass, { target }); } - void overlay_pass::emit_geometry(vk::command_buffer& cmd, glsl::program* /*program*/) + void overlay_pass::emit_geometry(vk::command_buffer& cmd) { vkCmdDraw(cmd, num_drawable_elements, 1, first_vertex, 0); } @@ -262,11 +328,11 @@ namespace vk // This call clobbers dynamic state cmd.flags |= vk::command_buffer::cb_reload_dynamic_state; - auto program = load_program(cmd, render_pass, src); + load_program(cmd, render_pass, src); set_up_viewport(cmd, viewport.x1, viewport.y1, viewport.width(), viewport.height()); vk::begin_renderpass(cmd, render_pass, fbo->value, { positionu{0u, 0u}, sizeu{fbo->width(), fbo->height()} }); - emit_geometry(cmd, program); + emit_geometry(cmd); } void overlay_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::image* target, const std::vector& src, VkRenderPass render_pass) @@ -310,7 +376,6 @@ namespace vk // 2 input textures m_num_usable_samplers = 2; - m_num_uniform_buffers = 0; renderpass_config.set_attachment_count(1); renderpass_config.set_color_mask(0, true, true, true, true); @@ -485,39 +550,24 @@ namespace vk false, true, desc->get_data(), owner_uid); } - std::vector ui_overlay_renderer::get_vertex_inputs() + std::vector ui_overlay_renderer::get_push_constants() { - auto result = overlay_pass::get_vertex_inputs(); - result.push_back( - glsl::program_input::make( - ::glsl::glsl_vertex_program, - "push_constants", - glsl::input_type_push_constant, - 0, - 0, - glsl::push_constant_ref { .size = 68 } - ) - ); - return result; + return + { + { + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = 68 + }, + { + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 68, + .size = 12 + } + }; } - std::vector ui_overlay_renderer::get_fragment_inputs() - { - auto result = overlay_pass::get_fragment_inputs(); - result.push_back( - glsl::program_input::make( - ::glsl::glsl_fragment_program, - "push_constants", - glsl::input_type_push_constant, - 0, - 0, - glsl::push_constant_ref {.offset = 68, .size = 12 } - ) - ); - return result; - } - - void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) + void ui_overlay_renderer::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) { // Byte Layout // 00: vec4 ui_scale; @@ -550,7 +600,7 @@ namespace vk .get(); push_buf[16] = std::bit_cast(vert_config); - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 68, push_buf); // 2. Fragment stuff rsx::overlays::fragment_options frag_opts; @@ -564,7 +614,7 @@ namespace vk push_buf[1] = m_time; push_buf[2] = m_blur_strength; - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 68, 12, push_buf); } void ui_overlay_renderer::set_primitive_type(rsx::overlays::primitive_type type) @@ -591,7 +641,7 @@ namespace vk } } - void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd, glsl::program* program) + void ui_overlay_renderer::emit_geometry(vk::command_buffer& cmd) { if (m_current_primitive_type == rsx::overlays::primitive_type::quad_list) { @@ -607,7 +657,7 @@ namespace vk } else { - overlay_pass::emit_geometry(cmd, program); + overlay_pass::emit_geometry(cmd); } } @@ -709,30 +759,22 @@ namespace vk // Disable samplers m_num_usable_samplers = 0; - // Disable UBOs - m_num_uniform_buffers = 0; - renderpass_config.set_depth_mask(false); renderpass_config.set_color_mask(0, true, true, true, true); renderpass_config.set_attachment_count(1); } - std::vector attachment_clear_pass::get_vertex_inputs() + std::vector attachment_clear_pass::get_push_constants() { - check_heap(); - return - { - vk::glsl::program_input::make( - ::glsl::glsl_vertex_program, - "push_constants", - vk::glsl::input_type_push_constant, - 0, - 0, - glsl::push_constant_ref{ .size = 32 }) - }; + VkPushConstantRange constant; + constant.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + constant.offset = 0; + constant.size = 32; + + return { constant }; } - void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) + void attachment_clear_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) { f32 data[8]; data[0] = clear_color.r; @@ -744,7 +786,7 @@ namespace vk data[6] = colormask.b; data[7] = colormask.a; - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 32, data); } void attachment_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -808,9 +850,6 @@ namespace vk "{\n" " out_color = vec4(0.);\n" "}\n"; - - m_num_uniform_buffers = 0; - m_num_usable_samplers = 0; } void stencil_clear_pass::set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h) @@ -859,7 +898,7 @@ namespace vk std::pair repl_list[] = { - { "%sampler_binding", "x" }, + { "%sampler_binding", fmt::format("(%d + x)", sampler_location(0)) }, { "%set_decorator", "set=0" }, }; fs_src = fmt::replace_all(fs_src, repl_list); @@ -869,28 +908,21 @@ namespace vk renderpass_config.set_attachment_count(1); m_num_usable_samplers = 2; - m_num_uniform_buffers = 0; } - std::vector video_out_calibration_pass::get_fragment_inputs() + std::vector video_out_calibration_pass::get_push_constants() { - auto result = overlay_pass::get_fragment_inputs(); - result.push_back( - vk::glsl::program_input::make( - ::glsl::glsl_fragment_program, - "push_constants", - vk::glsl::input_type_push_constant, - 0, - 0, - glsl::push_constant_ref{ .size = 16 } - ) - ); - return result; + VkPushConstantRange constant; + constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + constant.offset = 0; + constant.size = 16; + + return { constant }; } - void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) + void video_out_calibration_pass::update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) { - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, config.data); } void video_out_calibration_pass::run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index a968f706a1..7308a5c894 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -44,6 +44,11 @@ namespace vk vk::glsl::shader m_vertex_shader; vk::glsl::shader m_fragment_shader; + vk::descriptor_pool m_descriptor_pool; + descriptor_set m_descriptor_set; + VkDescriptorSetLayout m_descriptor_layout = nullptr; + VkPipelineLayout m_pipeline_layout = nullptr; + VkFilter m_sampler_filter = VK_FILTER_LINEAR; u32 m_num_usable_samplers = 1; u32 m_num_input_attachments = 0; @@ -78,6 +83,8 @@ namespace vk void check_heap(); + void init_descriptors(); + virtual void update_uniforms(vk::command_buffer& /*cmd*/, vk::glsl::program* /*program*/) {} virtual std::vector get_vertex_inputs(); @@ -85,6 +92,11 @@ namespace vk virtual void get_dynamic_state_entries(std::vector& /*state_descriptors*/) {} + virtual std::vector get_push_constants() + { + return {}; + } + int sampler_location(int index) const { return 1 + index; } int input_attachment_location(int index) const { return 1 + m_num_usable_samplers + index; } @@ -101,7 +113,8 @@ namespace vk } vk::glsl::program* build_pipeline(u64 storage_key, VkRenderPass render_pass); - vk::glsl::program* load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); + + void load_program(vk::command_buffer& cmd, VkRenderPass pass, const std::vector& src); virtual void create(const vk::render_device& dev); virtual void destroy(); @@ -110,7 +123,7 @@ namespace vk vk::framebuffer* get_framebuffer(vk::image* target, VkRenderPass render_pass); - virtual void emit_geometry(vk::command_buffer& cmd, glsl::program* program); + virtual void emit_geometry(vk::command_buffer& cmd); virtual void set_up_viewport(vk::command_buffer& cmd, u32 x, u32 y, u32 w, u32 h); @@ -156,14 +169,13 @@ namespace vk vk::image_view* find_font(rsx::overlays::font* font, vk::command_buffer& cmd, vk::data_heap& upload_heap); vk::image_view* find_temp_image(rsx::overlays::image_info_base* desc, vk::command_buffer& cmd, vk::data_heap& upload_heap, u32 owner_uid); - std::vector get_vertex_inputs() override; - std::vector get_fragment_inputs() override; + std::vector get_push_constants() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; void set_primitive_type(rsx::overlays::primitive_type type); - void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override; + void emit_geometry(vk::command_buffer& cmd) override; void run(vk::command_buffer& cmd, const areau& viewport, vk::framebuffer* target, VkRenderPass render_pass, vk::data_heap& upload_heap, rsx::overlays::overlay& ui); @@ -177,7 +189,7 @@ namespace vk attachment_clear_pass(); - std::vector get_vertex_inputs() override; + std::vector get_push_constants() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override; @@ -215,7 +227,7 @@ namespace vk video_out_calibration_pass(); - std::vector get_fragment_inputs() override; + std::vector get_push_constants() override; void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override; diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp index 26e0b64098..52742e1241 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.cpp @@ -36,12 +36,12 @@ namespace vk { if (job.is_graphics_job) { - auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.inputs, {}, job.flags); + auto compiled = int_compile_graphics_pipe(job.graphics_data, job.graphics_modules, job.pipe_layout, job.inputs, {}); job.callback_func(compiled); } else { - auto compiled = int_compile_compute_pipe(job.compute_data, job.inputs, job.flags); + auto compiled = int_compile_compute_pipe(job.compute_data, job.pipe_layout); job.callback_func(compiled); } } @@ -50,33 +50,25 @@ namespace vk } } - std::unique_ptr pipe_compiler::int_compile_compute_pipe( - const VkComputePipelineCreateInfo& create_info, - const std::vector& cs_inputs, - op_flags flags) + std::unique_ptr pipe_compiler::int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout) { - auto program = std::make_unique(*m_device, create_info, cs_inputs); - program->link(flags & SEPARATE_SHADER_OBJECTS); - return program; + VkPipeline pipeline; + vkCreateComputePipelines(*g_render_device, nullptr, 1, &create_info, nullptr, &pipeline); + return std::make_unique(*m_device, pipeline, pipe_layout); } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe( - const VkGraphicsPipelineCreateInfo& create_info, - const std::vector& vs_inputs, - const std::vector& fs_inputs, - op_flags flags) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs) { - auto program = std::make_unique(*m_device, create_info, vs_inputs, fs_inputs); - program->link(flags & SEPARATE_SHADER_OBJECTS); - return program; + VkPipeline pipeline; + CHECK_RESULT(vkCreateGraphicsPipelines(*m_device, VK_NULL_HANDLE, 1, &create_info, nullptr, &pipeline)); + auto result = std::make_unique(*m_device, pipeline, pipe_layout, vs_inputs, fs_inputs); + result->link(); + return result; } - std::unique_ptr pipe_compiler::int_compile_graphics_pipe( - const vk::pipeline_props &create_info, - VkShaderModule modules[2], - const std::vector& vs_inputs, - const std::vector& fs_inputs, - op_flags flags) + std::unique_ptr pipe_compiler::int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs) { VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -165,54 +157,52 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = VK_NULL_HANDLE; + info.layout = pipe_layout; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(*m_device, create_info.renderpass_key); - return int_compile_graphics_pipe(info, vs_inputs, fs_inputs, flags); + return int_compile_graphics_pipe(info, pipe_layout, vs_inputs, fs_inputs); } std::unique_ptr pipe_compiler::compile( const VkComputePipelineCreateInfo& create_info, - op_flags flags, callback_t callback, - const std::vector& cs_inputs) + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback) { - if (flags & COMPILE_INLINE) + if (flags == COMPILE_INLINE) { - return int_compile_compute_pipe(create_info, cs_inputs, flags); + return int_compile_compute_pipe(create_info, pipe_layout); } - m_work_queue.push(create_info, cs_inputs, flags, callback); + m_work_queue.push(create_info, pipe_layout, callback); return {}; } std::unique_ptr pipe_compiler::compile( const VkGraphicsPipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, op_flags flags, callback_t /*callback*/, - const std::vector& vs_inputs, - const std::vector& fs_inputs) + const std::vector& vs_inputs, const std::vector& fs_inputs) { // It is very inefficient to defer this as all pointers need to be saved - ensure(flags & COMPILE_INLINE); - return int_compile_graphics_pipe(create_info, vs_inputs, fs_inputs, flags); + ensure(flags == COMPILE_INLINE); + return int_compile_graphics_pipe(create_info, pipe_layout, vs_inputs, fs_inputs); } std::unique_ptr pipe_compiler::compile( - const vk::pipeline_props &create_info, - VkShaderModule vs, - VkShaderModule fs, + const vk::pipeline_props& create_info, + VkShaderModule module_handles[2], + VkPipelineLayout pipe_layout, op_flags flags, callback_t callback, - const std::vector& vs_inputs, - const std::vector& fs_inputs) + const std::vector& vs_inputs, const std::vector& fs_inputs) { - VkShaderModule modules[] = { vs, fs }; - if (flags & COMPILE_INLINE) + if (flags == COMPILE_INLINE) { - return int_compile_graphics_pipe(create_info, modules, vs_inputs, fs_inputs, flags); + return int_compile_graphics_pipe(create_info, module_handles, pipe_layout, vs_inputs, fs_inputs); } - m_work_queue.push(create_info, modules, vs_inputs, fs_inputs, flags, callback); + m_work_queue.push(create_info, pipe_layout, module_handles, vs_inputs, fs_inputs, callback); return {}; } diff --git a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h index 762e8aadfc..836bc5f14f 100644 --- a/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h +++ b/rpcs3/Emu/RSX/VK/VKPipelineCompiler.h @@ -53,16 +53,13 @@ namespace vk class pipe_compiler { public: - enum op_flag_bits + enum op_flags { COMPILE_DEFAULT = 0, COMPILE_INLINE = 1, - COMPILE_DEFERRED = 2, - SEPARATE_SHADER_OBJECTS = 4 + COMPILE_DEFERRED = 2 }; - using op_flags = rsx::flags32_t; - using callback_t = std::function&)>; pipe_compiler(); @@ -71,20 +68,21 @@ namespace vk void initialize(const vk::render_device* pdev); std::unique_ptr compile( - const VkComputePipelineCreateInfo& cs, - op_flags flags, callback_t callback = {}, - const std::vector& cs_inputs = {}); + const VkComputePipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, + op_flags flags, callback_t callback = {}); std::unique_ptr compile( const VkGraphicsPipelineCreateInfo& create_info, + VkPipelineLayout pipe_layout, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); std::unique_ptr compile( const vk::pipeline_props &create_info, - VkShaderModule vs, - VkShaderModule fs, + VkShaderModule module_handles[2], + VkPipelineLayout pipe_layout, op_flags flags, callback_t callback = {}, const std::vector& vs_inputs = {}, const std::vector& fs_inputs = {}); @@ -114,25 +112,24 @@ namespace vk vk::pipeline_props graphics_data; compute_pipeline_props compute_data; + VkPipelineLayout pipe_layout; VkShaderModule graphics_modules[2]; std::vector inputs; - op_flags flags; - pipe_compiler_job( const vk::pipeline_props& props, + VkPipelineLayout layout, VkShaderModule modules[2], const std::vector& vs_in, const std::vector& fs_in, - op_flags flags_, callback_t func) { callback_func = func; graphics_data = props; + pipe_layout = layout; graphics_modules[0] = modules[0]; graphics_modules[1] = modules[1]; is_graphics_job = true; - flags = flags_; inputs.reserve(vs_in.size() + fs_in.size()); inputs.insert(inputs.end(), vs_in.begin(), vs_in.end()); @@ -141,42 +138,24 @@ namespace vk pipe_compiler_job( const VkComputePipelineCreateInfo& props, - const std::vector& cs_in, - op_flags flags_, + VkPipelineLayout layout, callback_t func) { callback_func = func; compute_data = props; + pipe_layout = layout; is_graphics_job = false; - flags = flags_; - - graphics_modules[0] = VK_NULL_HANDLE; - graphics_modules[1] = VK_NULL_HANDLE; - - inputs = cs_in; } }; const vk::render_device* m_device = nullptr; lf_queue m_work_queue; - std::unique_ptr int_compile_compute_pipe( - const VkComputePipelineCreateInfo& create_info, - const std::vector& cs_inputs, - op_flags flags); - - std::unique_ptr int_compile_graphics_pipe( - const VkGraphicsPipelineCreateInfo& create_info, - const std::vector& vs_inputs, - const std::vector& fs_inputs, - op_flags flags); - - std::unique_ptr int_compile_graphics_pipe( - const vk::pipeline_props &create_info, - VkShaderModule modules[2], - const std::vector& vs_inputs, - const std::vector& fs_inputs, - op_flags flags); + std::unique_ptr int_compile_compute_pipe(const VkComputePipelineCreateInfo& create_info, VkPipelineLayout pipe_layout); + std::unique_ptr int_compile_graphics_pipe(const VkGraphicsPipelineCreateInfo& create_info, VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs); + std::unique_ptr int_compile_graphics_pipe(const vk::pipeline_props &create_info, VkShaderModule modules[2], VkPipelineLayout pipe_layout, + const std::vector& vs_inputs, const std::vector& fs_inputs); }; void initialize_pipe_compiler(int num_worker_threads = -1); diff --git a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h index 3e4ee000df..4f9f535a76 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramBuffer.h +++ b/rpcs3/Emu/RSX/VK/VKProgramBuffer.h @@ -46,16 +46,15 @@ namespace vk const fragment_program_type& fragmentProgramData, const vk::pipeline_props& pipelineProperties, bool compile_async, - std::function callback) + std::function callback, + VkPipelineLayout common_pipeline_layout) { - vk::pipe_compiler::op_flags compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; - compiler_flags |= vk::pipe_compiler::SEPARATE_SHADER_OBJECTS; + const auto compiler_flags = compile_async ? vk::pipe_compiler::COMPILE_DEFERRED : vk::pipe_compiler::COMPILE_INLINE; + VkShaderModule modules[2] = { vertexProgramData.handle, fragmentProgramData.handle }; auto compiler = vk::get_pipe_compiler(); auto result = compiler->compile( - pipelineProperties, - vertexProgramData.handle, - fragmentProgramData.handle, + pipelineProperties, modules, common_pipeline_layout, compiler_flags, callback, vertexProgramData.uniforms, fragmentProgramData.uniforms); diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp index aae8407103..57174caa98 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.cpp @@ -1,6 +1,5 @@ #include "stdafx.h" #include "VKProgramPipeline.h" -#include "VKResourceManager.h" #include "vkutils/descriptors.h" #include "vkutils/device.h" @@ -8,85 +7,10 @@ namespace vk { - extern const vk::render_device* get_current_renderer(); - namespace glsl { using namespace ::glsl; - bool operator == (const descriptor_slot_t& a, const VkDescriptorImageInfo& b) - { - const auto ptr = std::get_if(&a); - return !!ptr && - ptr->imageView == b.imageView && - ptr->sampler == b.sampler && - ptr->imageLayout == b.imageLayout; - } - - bool operator == (const descriptor_slot_t& a, const VkDescriptorBufferInfo& b) - { - const auto ptr = std::get_if(&a); - return !!ptr && - ptr->buffer == b.buffer && - ptr->offset == b.offset && - ptr->range == b.range; - } - - bool operator == (const descriptor_slot_t& a, const VkBufferView& b) - { - const auto ptr = std::get_if(&a); - return !!ptr && *ptr == b; - } - - VkDescriptorType to_descriptor_type(program_input_type type) - { - switch (type) - { - case input_type_uniform_buffer: - return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - case input_type_texel_buffer: - return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - case input_type_texture: - return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - case input_type_storage_buffer: - return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - case input_type_storage_texture: - return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - default: - fmt::throw_exception("Unexpected program input type %d", static_cast(type)); - } - } - - VkShaderStageFlags to_shader_stage_flags(::glsl::program_domain domain) - { - switch (domain) - { - case glsl_vertex_program: - return VK_SHADER_STAGE_VERTEX_BIT; - case glsl_fragment_program: - return VK_SHADER_STAGE_FRAGMENT_BIT; - case glsl_compute_program: - return VK_SHADER_STAGE_COMPUTE_BIT; - default: - fmt::throw_exception("Unexpected domain %d", static_cast(domain)); - } - } - - const char* to_string(::glsl::program_domain domain) - { - switch (domain) - { - case glsl_vertex_program: - return "vertex"; - case glsl_fragment_program: - return "fragment"; - case glsl_compute_program: - return "compute"; - default: - fmt::throw_exception("Unexpected domain %d", static_cast(domain)); - } - } - void shader::create(::glsl::program_domain domain, const std::string& source) { type = domain; @@ -99,8 +23,11 @@ namespace vk if (!spirv::compile_glsl_to_spv(m_compiled, m_source, type, ::glsl::glsl_rules_vulkan)) { + const std::string shader_type = type == ::glsl::program_domain::glsl_vertex_program ? "vertex" : + type == ::glsl::program_domain::glsl_fragment_program ? "fragment" : "compute"; + rsx_log.notice("%s", m_source); - fmt::throw_exception("Failed to compile %s shader", to_string(type)); + fmt::throw_exception("Failed to compile %s shader", shader_type); } VkShaderModuleCreateInfo vs_info; @@ -142,505 +69,165 @@ namespace vk return m_handle; } - void program::init() + void program::create_impl() { - m_linked = false; + linked = false; + attribute_location_mask = 0; + vertex_attributes_mask = 0; + + fs_texture_bindings.fill(~0u); + fs_texture_mirror_bindings.fill(~0u); + vs_texture_bindings.fill(~0u); } - program::program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs) - : m_device(dev), m_info(create_info) + program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs) + : m_device(dev), pipeline(p), pipeline_layout(layout) { - init(); - - load_uniforms(vertex_inputs); + create_impl(); + load_uniforms(vertex_input); load_uniforms(fragment_inputs); } - program::program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs) - : m_device(dev), m_info(create_info) + program::program(VkDevice dev, VkPipeline p, VkPipelineLayout layout) + : m_device(dev), pipeline(p), pipeline_layout(layout) { - init(); - - load_uniforms(compute_inputs); + create_impl(); } program::~program() { - vkDestroyPipeline(m_device, m_pipeline, nullptr); - - if (m_pipeline_layout) - { - vkDestroyPipelineLayout(m_device, m_pipeline_layout, nullptr); - - for (auto& set : m_sets) - { - set.destroy(); - } - } + vkDestroyPipeline(m_device, pipeline, nullptr); } program& program::load_uniforms(const std::vector& inputs) { - ensure(!m_linked); // "Cannot change uniforms in already linked program!" + ensure(!linked); // "Cannot change uniforms in already linked program!" for (auto &item : inputs) { - ensure(item.set < binding_set_index_max_enum); // Ensure we have a valid set id - ensure(item.location < 128u || item.type == input_type_push_constant); // Arbitrary limit but useful to catch possibly uninitialized values - m_sets[item.set].m_inputs[item.type].push_back(item); + uniforms[item.type].push_back(item); } return *this; } - program& program::link(bool separate_objects) + program& program::link() { - auto p_graphics_info = std::get_if(&m_info); - auto p_compute_info = !p_graphics_info ? std::get_if(&m_info) : nullptr; - const bool is_graphics_pipe = p_graphics_info != nullptr; - - if (!is_graphics_pipe) [[ likely ]] + // Preprocess texture bindings + // Link step is only useful for rasterizer programs, compute programs do not need this + for (const auto &uniform : uniforms[program_input_type::input_type_texture]) { - // We only support compute and graphics, so disable this for compute - separate_objects = false; - } - - if (!separate_objects) - { - // Collapse all sets into set 0 if validation passed - auto& sink = m_sets[0]; - for (auto& set : m_sets) + if (const auto name_start = uniform.name.find("tex"); name_start != umax) { - if (&set == &sink) - { - continue; - } + const auto name_end = uniform.name.find("_stencil"); + const auto index_start = name_start + 3; // Skip 'tex' part + const auto index_length = (name_end != umax) ? name_end - index_start : name_end; + const auto index_part = uniform.name.substr(index_start, index_length); + const auto index = std::stoi(index_part); - for (auto& type_arr : set.m_inputs) + if (name_start == 0) { - if (type_arr.empty()) + // Fragment texture (tex...) + if (name_end == umax) { - continue; + // Normal texture + fs_texture_bindings[index] = uniform.location; } - - auto type = type_arr.front().type; - auto& dst = sink.m_inputs[type]; - dst.insert(dst.end(), type_arr.begin(), type_arr.end()); - - // Clear - type_arr.clear(); - } - } - - sink.validate(); - sink.init(m_device); - } - else - { - for (auto& set : m_sets) - { - for (auto& type_arr : set.m_inputs) - { - if (type_arr.empty()) + else { - continue; + // Stencil mirror + fs_texture_mirror_bindings[index] = uniform.location; } - - // Real set - set.validate(); - set.init(m_device); - break; + } + else + { + // Vertex texture (vtex...) + vs_texture_bindings[index] = uniform.location; } } } - create_pipeline_layout(); - ensure(m_pipeline_layout); - - if (is_graphics_pipe) - { - VkGraphicsPipelineCreateInfo create_info = *p_graphics_info; - create_info.layout = m_pipeline_layout; - CHECK_RESULT(vkCreateGraphicsPipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); - } - else - { - VkComputePipelineCreateInfo create_info = *p_compute_info; - create_info.layout = m_pipeline_layout; - CHECK_RESULT(vkCreateComputePipelines(m_device, nullptr, 1, &create_info, nullptr, &m_pipeline)); - } - - m_linked = true; + linked = true; return *this; } bool program::has_uniform(program_input_type type, const std::string& uniform_name) { - for (auto& set : m_sets) + const auto& uniform = uniforms[type]; + return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) { - const auto& uniform = set.m_inputs[type]; - return std::any_of(uniform.cbegin(), uniform.cend(), [&uniform_name](const auto& u) - { - return u.name == uniform_name; - }); - } - - return false; + return u.name == uniform_name; + }); } - std::pair program::get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name) + void program::bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string& uniform_name, VkDescriptorType type, vk::descriptor_set &set) { - for (unsigned i = 0; i < ::size32(m_sets); ++i) + for (const auto &uniform : uniforms[program_input_type::input_type_texture]) { - const auto& type_arr = m_sets[i].m_inputs[type]; - const auto result = std::find_if(type_arr.cbegin(), type_arr.cend(), [&](const auto& u) + if (uniform.name == uniform_name) { - return u.domain == domain && u.name == uniform_name; - }); - - if (result != type_arr.end()) - { - return { i, result->location }; + set.push(image_descriptor, type, uniform.location); + attribute_location_mask |= (1ull << uniform.location); + return; } } - return { umax, umax }; + rsx_log.notice("texture not found in program: %s", uniform_name.c_str()); } - void program::bind_uniform(const VkDescriptorImageInfo& image_descriptor, u32 set_id, u32 binding_point) + void program::bind_uniform(const VkDescriptorImageInfo & image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror) { - if (m_sets[set_id].m_descriptor_slots[binding_point] == image_descriptor) + ensure(domain != ::glsl::program_domain::glsl_compute_program); + + u32 binding; + if (domain == ::glsl::program_domain::glsl_fragment_program) { + binding = (is_stencil_mirror) ? fs_texture_mirror_bindings[texture_unit] : fs_texture_bindings[texture_unit]; + } + else + { + binding = vs_texture_bindings[texture_unit]; + } + + if (binding != ~0u) + { + set.push(image_descriptor, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); + attribute_location_mask |= (1ull << binding); return; } - m_sets[set_id].notify_descriptor_slot_updated(binding_point, image_descriptor); + rsx_log.notice("texture not found in program: %stex%u", (domain == ::glsl::program_domain::glsl_vertex_program)? "v" : "", texture_unit); } - void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point) + void program::bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set) { - if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_descriptor) - { - return; - } - - m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_descriptor); + bind_buffer(buffer_descriptor, binding_point, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, set); } - void program::bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point) + void program::bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set) { - if (m_sets[set_id].m_descriptor_slots[binding_point] == buffer_view) - { - return; - } - - m_sets[set_id].notify_descriptor_slot_updated(binding_point, buffer_view); + set.push(buffer_view, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, binding_point); + attribute_location_mask |= (1ull << binding_point); } - void program::bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point) + void program::bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set) { - // Non-caching write - auto& set = m_sets[set_id]; - auto& arr = set.m_scratch_images_array; - - descriptor_array_ref_t data + for (const auto &uniform : uniforms[type]) { - .first = arr.size(), - .count = static_cast(count) - }; - - arr.reserve(arr.size() + static_cast(count)); - for (int i = 0; i < count; ++i) - { - arr.push_back(image_descriptors[i]); - } - - set.notify_descriptor_slot_updated(binding_point, data); - } - - void program::create_pipeline_layout() - { - ensure(!m_linked); - ensure(m_pipeline_layout == VK_NULL_HANDLE); - - rsx::simple_array push_constants{}; - rsx::simple_array set_layouts{}; - - for (auto& set : m_sets) - { - if (!set.m_device) + if (uniform.name == binding_name) { - continue; - } - - set.create_descriptor_set_layout(); - set_layouts.push_back(set.m_descriptor_set_layout); - - for (const auto& input : set.m_inputs[input_type_push_constant]) - { - const auto& range = input.as_push_constant(); - push_constants.push_back({ - .stageFlags = to_shader_stage_flags(input.domain), - .offset = range.offset, - .size = range.size - }); - } - } - - VkPipelineLayoutCreateInfo create_info - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .flags = 0, - .setLayoutCount = set_layouts.size(), - .pSetLayouts = set_layouts.data(), - .pushConstantRangeCount = push_constants.size(), - .pPushConstantRanges = push_constants.data() - }; - CHECK_RESULT(vkCreatePipelineLayout(m_device, &create_info, nullptr, &m_pipeline_layout)); - } - - program& program::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point) - { - VkDescriptorSet bind_sets[binding_set_index_max_enum]; - unsigned count = 0; - - for (auto& set : m_sets) - { - if (!set.m_device) - { - continue; - } - - bind_sets[count++] = set.commit(); // Commit variable changes and return handle to the new set - } - - vkCmdBindPipeline(cmd, bind_point, m_pipeline); - vkCmdBindDescriptorSets(cmd, bind_point, m_pipeline_layout, 0, count, bind_sets, 0, nullptr); - return *this; - } - - void descriptor_table_t::destroy() - { - if (!m_device) - { - return; - } - - if (m_descriptor_set_layout) - { - vkDestroyDescriptorSetLayout(m_device, m_descriptor_set_layout, nullptr); - } - - if (m_descriptor_pool) - { - m_descriptor_pool->destroy(); - m_descriptor_pool.reset(); - } - - m_device = VK_NULL_HANDLE; - } - - void descriptor_table_t::init(VkDevice dev) - { - m_device = dev; - - size_t bind_slots_count = 0; - for (auto& type_arr : m_inputs) - { - if (type_arr.empty() || type_arr.front().type == input_type_push_constant) - { - continue; - } - - bind_slots_count += type_arr.size(); - } - - m_descriptor_slots.resize(bind_slots_count); - std::memset(m_descriptor_slots.data(), 0, sizeof(descriptor_slot_t) * bind_slots_count); - - m_descriptors_dirty.resize(bind_slots_count); - std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); - } - - VkDescriptorSet descriptor_table_t::allocate_descriptor_set() - { - if (!m_descriptor_pool) - { - create_descriptor_pool(); - } - - return m_descriptor_pool->allocate(m_descriptor_set_layout); - } - - VkDescriptorSet descriptor_table_t::commit() - { - if (!m_descriptor_set) - { - m_any_descriptors_dirty = true; - std::fill(m_descriptors_dirty.begin(), m_descriptors_dirty.end(), false); - } - - // Check if we need to actually open a new set - if (!m_any_descriptors_dirty) - { - return m_descriptor_set.value(); - } - - auto push_descriptor_slot = [this](unsigned idx) - { - const auto& slot = m_descriptor_slots[idx]; - const VkDescriptorType type = m_descriptor_types[idx]; - if (auto ptr = std::get_if(&slot)) - { - m_descriptor_set.push(*ptr, type, idx); + bind_uniform(buffer_view, uniform.location, set); return; } - - if (auto ptr = std::get_if(&slot)) - { - m_descriptor_set.push(*ptr, type, idx); - return; - } - - if (auto ptr = std::get_if(&slot)) - { - m_descriptor_set.push(*ptr, type, idx); - return; - } - - if (auto ptr = std::get_if(&slot)) - { - ensure(type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); // Only type supported at the moment - ensure((ptr->first + ptr->count) <= m_scratch_images_array.size()); - m_descriptor_set.push(m_scratch_images_array.data() + ptr->first, ptr->count, type, idx); - return; - } - - fmt::throw_exception("Unexpected descriptor structure at index %u", idx); - }; - - m_descriptor_set = allocate_descriptor_set(); - - for (unsigned i = 0; i < m_descriptor_slots.size(); ++i) - { - if (m_descriptors_dirty[i]) - { - // Push - push_descriptor_slot(i); - m_descriptors_dirty[i] = false; - continue; - } - - // We should copy here if possible. - // Without descriptor_buffer, the most efficient option is to just use the normal bind logic due to the pointer-based nature of the descriptor inputs and no stride. - push_descriptor_slot(i); } - m_descriptor_set.on_bind(); - m_any_descriptors_dirty = false; - m_scratch_images_array.clear(); - - return m_descriptor_set.value(); + rsx_log.notice("vertex buffer not found in program: %s", binding_name.c_str()); } - void descriptor_table_t::create_descriptor_set_layout() + void program::bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set) { - ensure(m_descriptor_set_layout == VK_NULL_HANDLE); - - rsx::simple_array bindings; - bindings.reserve(16); - - m_descriptor_pool_sizes.clear(); - m_descriptor_pool_sizes.reserve(input_type_max_enum); - - std::unordered_map descriptor_type_map; - - auto descriptor_count = [](const std::string& name) -> u32 - { - const auto start = name.find_last_of("["); - if (start == std::string::npos) - { - return 1; - } - - const auto end = name.find_last_of("]"); - ensure(end != std::string::npos && start < end, "Invalid variable name"); - - const std::string array_size = name.substr(start + 1, end - start - 1); - if (const auto count = std::atoi(array_size.c_str()); - count > 0) - { - return count; - } - - return 1; - }; - - for (const auto& type_arr : m_inputs) - { - if (type_arr.empty() || type_arr.front().type == input_type_push_constant) - { - continue; - } - - VkDescriptorType type = to_descriptor_type(type_arr.front().type); - m_descriptor_pool_sizes.push_back({ .type = type }); - - for (const auto& input : type_arr) - { - VkDescriptorSetLayoutBinding binding - { - .binding = input.location, - .descriptorType = type, - .descriptorCount = descriptor_count(input.name), - .stageFlags = to_shader_stage_flags(input.domain) - }; - bindings.push_back(binding); - - descriptor_type_map[input.location] = type; - m_descriptor_pool_sizes.back().descriptorCount += binding.descriptorCount; - } - } - - m_descriptor_types.resize(::size32(m_descriptors_dirty)); - - for (u32 i = 0; i < ::size32(m_descriptors_dirty); ++i) - { - if (descriptor_type_map.find(i) == descriptor_type_map.end()) - { - fmt::throw_exception("Invalid input structure. Some input bindings were not declared!"); - } - m_descriptor_types[i] = descriptor_type_map[i]; - } - - m_descriptor_set_layout = vk::descriptors::create_layout(bindings); - } - - void descriptor_table_t::create_descriptor_pool() - { - m_descriptor_pool = std::make_unique(); - m_descriptor_pool->create(*vk::get_current_renderer(), m_descriptor_pool_sizes); - } - - void descriptor_table_t::validate() const - { - // Check for overlapping locations - std::set taken_locations; - - for (auto& type_arr : m_inputs) - { - if (type_arr.empty() || - type_arr.front().type == input_type_push_constant) - { - continue; - } - - for (const auto& input : type_arr) - { - ensure(taken_locations.find(input.location) == taken_locations.end(), "Overlapping input locations found."); - taken_locations.insert(input.location); - } - } + set.push(buffer_descriptor, type, binding_point); + attribute_location_mask |= (1ull << binding_point); } } } diff --git a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h index c7faaa8c37..06dbaf877f 100644 --- a/rpcs3/Emu/RSX/VK/VKProgramPipeline.h +++ b/rpcs3/Emu/RSX/VK/VKProgramPipeline.h @@ -7,7 +7,6 @@ #include #include -#include namespace vk { @@ -16,20 +15,18 @@ namespace vk enum program_input_type : u32 { input_type_uniform_buffer = 0, - input_type_texel_buffer, - input_type_texture, - input_type_storage_buffer, - input_type_storage_texture, - input_type_push_constant, + input_type_texel_buffer = 1, + input_type_texture = 2, + input_type_storage_buffer = 3, - input_type_max_enum + input_type_max_enum = 4 }; struct bound_sampler { - VkFormat format = VK_FORMAT_UNDEFINED; - VkImage image = VK_NULL_HANDLE; - VkComponentMapping mapping{}; + VkFormat format; + VkImage image; + VkComponentMapping mapping; }; struct bound_buffer @@ -40,50 +37,16 @@ namespace vk u64 size = 0; }; - struct push_constant_ref - { - u32 offset = 0; - u32 size = 0; - }; - struct program_input { ::glsl::program_domain domain; program_input_type type; - using bound_data_t = std::variant; - bound_data_t bound_data; + bound_buffer as_buffer; + bound_sampler as_sampler; - u32 set = 0; - u32 location = umax; + u32 location; std::string name; - - inline bound_buffer& as_buffer() { return *std::get_if(&bound_data); } - inline bound_sampler& as_sampler() { return *std::get_if(&bound_data); } - inline push_constant_ref& as_push_constant() { return *std::get_if(&bound_data); } - - inline const bound_buffer& as_buffer() const { return *std::get_if(&bound_data); } - inline const bound_sampler& as_sampler() const { return *std::get_if(&bound_data); } - inline const push_constant_ref& as_push_constant() const { return *std::get_if(&bound_data); } - - static program_input make( - ::glsl::program_domain domain, - const std::string& name, - program_input_type type, - u32 set, - u32 location, - const bound_data_t& data = bound_buffer{}) - { - return program_input - { - .domain = domain, - .type = type, - .bound_data = data, - .set = set, - .location = location, - .name = name - }; - } }; class shader @@ -109,103 +72,40 @@ namespace vk VkShaderModule get_handle() const; }; - struct descriptor_array_ref_t - { - u32 first = 0; - u32 count = 0; - }; - - using descriptor_slot_t = std::variant; - - struct descriptor_table_t - { - VkDevice m_device = VK_NULL_HANDLE; - std::array, input_type_max_enum> m_inputs; - - std::unique_ptr m_descriptor_pool; - VkDescriptorSetLayout m_descriptor_set_layout = VK_NULL_HANDLE; - vk::descriptor_set m_descriptor_set{}; - rsx::simple_array m_descriptor_pool_sizes; - rsx::simple_array m_descriptor_types; - - std::vector m_descriptor_slots; - std::vector m_descriptors_dirty; - bool m_any_descriptors_dirty = false; - - rsx::simple_array< VkDescriptorImageInfo> m_scratch_images_array; - - void init(VkDevice dev); - void destroy(); - - void validate() const; - - void create_descriptor_set_layout(); - void create_descriptor_pool(); - - VkDescriptorSet allocate_descriptor_set(); - VkDescriptorSet commit(); - - template - inline void notify_descriptor_slot_updated(u32 slot, const T& data) - { - m_descriptors_dirty[slot] = true; - m_descriptor_slots[slot] = data; - m_any_descriptors_dirty = true; - } - }; - - enum binding_set_index : u32 - { - // For separate shader objects - binding_set_index_vertex = 0, - binding_set_index_fragment = 1, - - // Aliases - binding_set_index_compute = 0, - binding_set_index_unified = 0, - - // Meta - binding_set_index_max_enum = 2, - }; - class program { - VkDevice m_device = VK_NULL_HANDLE; - VkPipeline m_pipeline = VK_NULL_HANDLE; - VkPipelineLayout m_pipeline_layout = VK_NULL_HANDLE; + std::array, input_type_max_enum> uniforms; + VkDevice m_device; - std::variant m_info; - std::array m_sets; - bool m_linked = false; + std::array fs_texture_bindings; + std::array fs_texture_mirror_bindings; + std::array vs_texture_bindings; + bool linked; - void init(); - void create_pipeline_layout(); - - program& load_uniforms(const std::vector& inputs); + void create_impl(); public: + VkPipeline pipeline; + VkPipelineLayout pipeline_layout; + u64 attribute_location_mask; + u64 vertex_attributes_mask; - program(VkDevice dev, const VkGraphicsPipelineCreateInfo& create_info, const std::vector &vertex_inputs, const std::vector& fragment_inputs); - program(VkDevice dev, const VkComputePipelineCreateInfo& create_info, const std::vector& compute_inputs); + program(VkDevice dev, VkPipeline p, VkPipelineLayout layout, const std::vector &vertex_input, const std::vector& fragment_inputs); + program(VkDevice dev, VkPipeline p, VkPipelineLayout layout); program(const program&) = delete; program(program&& other) = delete; ~program(); - program& link(bool separate_stages); - program& bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point); + program& load_uniforms(const std::vector& inputs); + program& link(); bool has_uniform(program_input_type type, const std::string &uniform_name); - std::pair get_uniform_location(::glsl::program_domain domain, program_input_type type, const std::string& uniform_name); - - void bind_uniform(const VkDescriptorImageInfo &image_descriptor, u32 set_id, u32 binding_point); - void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 set_id, u32 binding_point); - void bind_uniform(const VkBufferView &buffer_view, u32 set_id, u32 binding_point); - void bind_uniform(const VkBufferView &buffer_view, ::glsl::program_domain domain, program_input_type type, const std::string &binding_name); - - void bind_uniform_array(const VkDescriptorImageInfo* image_descriptors, int count, u32 set_id, u32 binding_point); - - inline VkPipelineLayout layout() const { return m_pipeline_layout; } - inline VkPipeline value() const { return m_pipeline; } + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, const std::string &uniform_name, VkDescriptorType type, vk::descriptor_set &set); + void bind_uniform(const VkDescriptorImageInfo &image_descriptor, int texture_unit, ::glsl::program_domain domain, vk::descriptor_set &set, bool is_stencil_mirror = false); + void bind_uniform(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, vk::descriptor_set &set); + void bind_uniform(const VkBufferView &buffer_view, u32 binding_point, vk::descriptor_set &set); + void bind_uniform(const VkBufferView &buffer_view, program_input_type type, const std::string &binding_name, vk::descriptor_set &set); + void bind_buffer(const VkDescriptorBufferInfo &buffer_descriptor, u32 binding_point, VkDescriptorType type, vk::descriptor_set &set); }; } } diff --git a/rpcs3/Emu/RSX/VK/VKRenderTargets.h b/rpcs3/Emu/RSX/VK/VKRenderTargets.h index 3c3ef0acbd..caa85dcc84 100644 --- a/rpcs3/Emu/RSX/VK/VKRenderTargets.h +++ b/rpcs3/Emu/RSX/VK/VKRenderTargets.h @@ -154,7 +154,7 @@ namespace vk // If we have driver support for FBO loops, set the usage flag for it. if (vk::get_current_renderer()->get_framebuffer_loops_support()) { - return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT }; + return { VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT, 0 }; } // Workarounds to force transition to GENERAL to decompress. diff --git a/rpcs3/Emu/RSX/VK/VKResolveHelper.h b/rpcs3/Emu/RSX/VK/VKResolveHelper.h index 23d243b032..7cf6631b67 100644 --- a/rpcs3/Emu/RSX/VK/VKResolveHelper.h +++ b/rpcs3/Emu/RSX/VK/VKResolveHelper.h @@ -16,47 +16,50 @@ namespace vk u32 cs_wave_y = 1; cs_resolve_base() - { - ssbo_count = 0; - } + {} virtual ~cs_resolve_base() {} void build(const std::string& format_prefix, bool unresolve, bool bgra_swap); - std::vector get_inputs() override + std::vector> get_descriptor_layout() override + { + return + { + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2 } + }; + } + + void declare_inputs() override { std::vector inputs = { - glsl::program_input::make( + { ::glsl::program_domain::glsl_compute_program, - "multisampled", - glsl::input_type_storage_texture, + vk::glsl::program_input_type::input_type_texture, + {}, {}, 0, - 0 - ), - - glsl::program_input::make( + "multisampled" + }, + { ::glsl::program_domain::glsl_compute_program, - "resolve", - glsl::input_type_storage_texture, - 0, - 1 - ), + vk::glsl::program_input_type::input_type_texture, + {}, {}, + 1, + "resolve" + } }; - auto result = compute_task::get_inputs(); - result.insert(result.end(), inputs.begin(), inputs.end()); - return result; + m_program->load_uniforms(inputs); } - void bind_resources(const vk::command_buffer& /*cmd*/) override + void bind_resources() override { auto msaa_view = multisampled->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_VIEW_MULTISAMPLED)); auto resolved_view = resolve->get_view(rsx::default_remap_vector.with_encoding(VK_REMAP_IDENTITY)); - m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, 0, 0); - m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, 0, 1); + m_program->bind_uniform({ VK_NULL_HANDLE, msaa_view->value, multisampled->current_layout }, "multisampled", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); + m_program->bind_uniform({ VK_NULL_HANDLE, resolved_view->value, resolve->current_layout }, "resolve", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); } void run(const vk::command_buffer& cmd, vk::viewable_image* msaa_image, vk::viewable_image* resolve_image) @@ -113,23 +116,19 @@ namespace vk void build(bool resolve_depth, bool resolve_stencil, bool unresolve); - std::vector get_fragment_inputs() override + std::vector get_push_constants() override { - auto result = overlay_pass::get_fragment_inputs(); - result.push_back(glsl::program_input::make( - ::glsl::glsl_fragment_program, - "push_constants", - glsl::input_type_push_constant, - 0, - umax, - glsl::push_constant_ref{ .size = 16 } - )); - return result; + VkPushConstantRange constant; + constant.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + constant.offset = 0; + constant.size = 16; + + return { constant }; } - void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* program) override + void update_uniforms(vk::command_buffer& cmd, vk::glsl::program* /*program*/) override { - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, static_parameters_width * 4, static_parameters); } void update_sample_configuration(vk::image* msaa_image) @@ -227,16 +226,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override + void emit_geometry(vk::command_buffer& cmd) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, ®ion); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd, program); + overlay_pass::emit_geometry(cmd); } } @@ -286,16 +285,16 @@ namespace vk state_descriptors.push_back(VK_DYNAMIC_STATE_STENCIL_WRITE_MASK); } - void emit_geometry(vk::command_buffer& cmd, glsl::program* program) override + void emit_geometry(vk::command_buffer& cmd) override { vkCmdClearAttachments(cmd, 1, &clear_info, 1, &clear_region); for (s32 write_mask = 0x1; write_mask <= 0x80; write_mask <<= 1) { vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, write_mask); - vkCmdPushConstants(cmd, program->layout(), VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 8, 4, &write_mask); - overlay_pass::emit_geometry(cmd, program); + overlay_pass::emit_geometry(cmd); } } diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp index c80fb7873d..3c9188fd60 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.cpp @@ -12,70 +12,23 @@ namespace vk { - u32 shader_interpreter::init(VKVertexProgram* vk_prog, u64 compiler_options) const - { - std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); - - u32 location = 0; - vk_prog->binding_table.vertex_buffers_location = location; - location += 3; - - vk_prog->binding_table.context_buffer_location = location++; - - if (vk::emulate_conditional_rendering()) - { - vk_prog->binding_table.cr_pred_buffer_location = location++; - } - - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) - { - vk_prog->binding_table.instanced_lut_buffer_location = location++; - vk_prog->binding_table.instanced_cbuf_location = location++; - } - else - { - vk_prog->binding_table.cbuf_location = location++; - } - - if (vk::emulate_conditional_rendering()) - { - vk_prog->binding_table.cr_pred_buffer_location = location++; - } - - // Return next index - return location; - } - - u32 shader_interpreter::init(VKFragmentProgram* vk_prog, u64 /*compiler_opt*/) const - { - std::memset(&vk_prog->binding_table, 0xff, sizeof(vk_prog->binding_table)); - - vk_prog->binding_table.context_buffer_location = 0; - vk_prog->binding_table.tex_param_location = 1; - vk_prog->binding_table.polygon_stipple_params_location = 2; - - // Return next index - return 3; - } - - VKVertexProgram* shader_interpreter::build_vs(u64 compiler_options) + glsl::shader* shader_interpreter::build_vs(u64 compiler_options) { ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_vertex_program; properties.require_lit_emulation = true; + // TODO: Extend decompiler thread + // TODO: Rename decompiler thread, it no longer spawns a thread RSXVertexProgram null_prog; std::string shader_str; ParamArray arr; - - // Initialize binding layout - auto vk_prog = std::make_unique(); - m_vertex_instruction_start = init(vk_prog.get(), compiler_options); + VKVertexProgram vk_prog; null_prog.ctrl = (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) ? RSX_SHADER_CONTROL_INSTANCED_CONSTANTS : 0; - VKVertexDecompilerThread comp(null_prog, shader_str, arr, *vk_prog); + VKVertexDecompilerThread comp(null_prog, shader_str, arr, vk_prog); // Initialize compiler properties comp.properties.has_indexed_constants = true; @@ -99,12 +52,6 @@ namespace vk " uvec4 vp_instructions[];\n" "};\n\n"; - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES) - { - // FIXME: Unimplemented - rsx_log.todo("Vertex textures are currently not implemented for the shader interpreter."); - } - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING) { builder << "#define _ENABLE_INSTANCED_CONSTANTS\n"; @@ -121,29 +68,48 @@ namespace vk builder << program_common::interpreter::get_vertex_interpreter(); const std::string s = builder.str(); - auto vs = &vk_prog->shader; + auto vs = std::make_unique(); vs->create(::glsl::program_domain::glsl_vertex_program, s); vs->compile(); - // Declare local inputs - auto vs_inputs = comp.get_inputs(); - + // Prepare input table + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); vk::glsl::program_input in; - in.set = 0; + + in.location = binding_table.vertex_params_bind_slot; in.domain = ::glsl::glsl_vertex_program; - in.location = m_vertex_instruction_start; - in.type = glsl::input_type_storage_buffer; - in.name = "VertexInstructionBlock"; - vs_inputs.push_back(in); + in.name = "VertexContextBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_vs_inputs.push_back(in); - vk_prog->SetInputs(vs_inputs); + in.location = binding_table.vertex_buffers_first_bind_slot; + in.name = "persistent_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); - auto ret = vk_prog.get(); - m_shader_cache[compiler_options].m_vs = std::move(vk_prog); + in.location = binding_table.vertex_buffers_first_bind_slot + 1; + in.name = "volatile_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_buffers_first_bind_slot + 2; + in.name = "vertex_layout_stream"; + in.type = vk::glsl::input_type_texel_buffer; + m_vs_inputs.push_back(in); + + in.location = binding_table.vertex_constant_buffers_bind_slot; + in.name = "VertexConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_vs_inputs.push_back(in); + + // TODO: Bind textures if needed + + auto ret = vs.get(); + m_shader_cache[compiler_options].m_vs = std::move(vs); return ret; } - VKFragmentProgram* shader_interpreter::build_fs(u64 compiler_options) + glsl::shader* shader_interpreter::build_fs(u64 compiler_options) { [[maybe_unused]] ::glsl::shader_properties properties{}; properties.domain = ::glsl::program_domain::glsl_fragment_program; @@ -154,13 +120,10 @@ namespace vk ParamArray arr; std::string shader_str; RSXFragmentProgram frag; + VKFragmentProgram vk_prog; + VKFragmentDecompilerThread comp(shader_str, arr, frag, len, vk_prog); - auto vk_prog = std::make_unique(); - m_fragment_instruction_start = init(vk_prog.get(), compiler_options); - m_fragment_textures_start = m_fragment_instruction_start + 1; - - VKFragmentDecompilerThread comp(shader_str, arr, frag, len, *vk_prog); - + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); std::stringstream builder; builder << "#version 450\n" @@ -236,7 +199,7 @@ namespace vk for (int i = 0, bind_location = m_fragment_textures_start; i < 4; ++i) { - builder << "layout(set=1, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; + builder << "layout(set=0, binding=" << bind_location++ << ") " << "uniform " << type_names[i] << " " << type_names[i] << "_array[16];\n"; } builder << "\n" @@ -248,7 +211,7 @@ namespace vk } builder << - "layout(std430, set=1, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" + "layout(std430, binding=" << m_fragment_instruction_start << ") readonly restrict buffer FragmentInstructionBlock\n" "{\n" " uint shader_control;\n" " uint texture_control;\n" @@ -260,55 +223,182 @@ namespace vk builder << program_common::interpreter::get_fragment_interpreter(); const std::string s = builder.str(); - auto fs = &vk_prog->shader; + auto fs = std::make_unique(); fs->create(::glsl::program_domain::glsl_fragment_program, s); fs->compile(); - // Declare local inputs - auto inputs = comp.get_inputs(); - + // Prepare input table vk::glsl::program_input in; - in.set = 1; + in.location = binding_table.fragment_constant_buffers_bind_slot; in.domain = ::glsl::glsl_fragment_program; - in.location = m_fragment_instruction_start; - in.type = glsl::input_type_storage_buffer; - in.name = "FragmentInstructionBlock"; - inputs.push_back(in); + in.name = "FragmentConstantsBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + m_fs_inputs.push_back(in); - if (compiler_options & program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES) + in.location = binding_table.fragment_state_bind_slot; + in.name = "FragmentStateBuffer"; + m_fs_inputs.push_back(in); + + in.location = binding_table.fragment_texture_params_bind_slot; + in.name = "TextureParametersBuffer"; + m_fs_inputs.push_back(in); + + for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) { - for (int i = 0, location = m_fragment_textures_start; i < 4; ++i, ++location) - { - in.location = location; - in.name = std::string(type_names[i]) + "_array[16]"; - in.type = glsl::input_type_texture; - inputs.push_back(in); - } + in.location = location; + in.name = std::string(type_names[i]) + "_array[16]"; + m_fs_inputs.push_back(in); } - vk_prog->SetInputs(inputs); - - auto ret = vk_prog.get(); - m_shader_cache[compiler_options].m_fs = std::move(vk_prog); + auto ret = fs.get(); + m_shader_cache[compiler_options].m_fs = std::move(fs); return ret; } + std::pair shader_interpreter::create_layout(VkDevice dev) + { + const auto& binding_table = vk::get_current_renderer()->get_pipeline_binding_table(); + auto bindings = get_common_binding_table(); + u32 idx = ::size32(bindings); + + bindings.resize(binding_table.total_descriptor_bindings); + + // Texture 1D array + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot; + bindings[idx].pImmutableSamplers = nullptr; + + m_fragment_textures_start = bindings[idx].binding; + idx++; + + // Texture 2D array + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 1; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + // Texture 3D array + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 2; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + // Texture CUBE array + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 16; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 3; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + // Vertex texture array (2D only) + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + bindings[idx].descriptorCount = 4; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 4; + bindings[idx].pImmutableSamplers = nullptr; + + idx++; + + // Vertex program ucode block + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 5; + bindings[idx].pImmutableSamplers = nullptr; + + m_vertex_instruction_start = bindings[idx].binding; + idx++; + + // Fragment program ucode block + bindings[idx].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + bindings[idx].descriptorCount = 1; + bindings[idx].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + bindings[idx].binding = binding_table.textures_first_bind_slot + 6; + bindings[idx].pImmutableSamplers = nullptr; + + m_fragment_instruction_start = bindings[idx].binding; + idx++; + bindings.resize(idx); + + m_descriptor_pool_sizes = get_descriptor_pool_sizes(bindings); + + std::array push_constants; + push_constants[0].offset = 0; + push_constants[0].size = 16; + push_constants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + if (vk::emulate_conditional_rendering()) + { + // Conditional render toggle + push_constants[0].size = 20; + } + + const auto set_layout = vk::descriptors::create_layout(bindings); + + VkPipelineLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + layout_info.setLayoutCount = 1; + layout_info.pSetLayouts = &set_layout; + layout_info.pushConstantRangeCount = 1; + layout_info.pPushConstantRanges = push_constants.data(); + + VkPipelineLayout result; + CHECK_RESULT(vkCreatePipelineLayout(dev, &layout_info, nullptr, &result)); + return { set_layout, result }; + } + + void shader_interpreter::create_descriptor_pools(const vk::render_device& dev) + { + const auto max_draw_calls = dev.get_descriptor_max_draw_calls(); + m_descriptor_pool.create(dev, m_descriptor_pool_sizes, max_draw_calls); + } + void shader_interpreter::init(const vk::render_device& dev) { m_device = dev; + std::tie(m_shared_descriptor_layout, m_shared_pipeline_layout) = create_layout(dev); + create_descriptor_pools(dev); } void shader_interpreter::destroy() { m_program_cache.clear(); + m_descriptor_pool.destroy(); + + for (auto &fs : m_shader_cache) + { + fs.second.m_vs->destroy(); + fs.second.m_fs->destroy(); + } + m_shader_cache.clear(); + + if (m_shared_pipeline_layout) + { + vkDestroyPipelineLayout(m_device, m_shared_pipeline_layout, nullptr); + m_shared_pipeline_layout = VK_NULL_HANDLE; + } + + if (m_shared_descriptor_layout) + { + vkDestroyDescriptorSetLayout(m_device, m_shared_descriptor_layout, nullptr); + m_shared_descriptor_layout = VK_NULL_HANDLE; + } } glsl::program* shader_interpreter::link(const vk::pipeline_props& properties, u64 compiler_opt) { - VKVertexProgram* vs; - VKFragmentProgram* fs; - + glsl::shader *fs, *vs; if (auto found = m_shader_cache.find(compiler_opt); found != m_shader_cache.end()) { fs = found->second.m_fs.get(); @@ -323,12 +413,12 @@ namespace vk VkPipelineShaderStageCreateInfo shader_stages[2] = {}; shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; - shader_stages[0].module = vs->shader.get_handle(); + shader_stages[0].module = vs->get_handle(); shader_stages[0].pName = "main"; shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; - shader_stages[1].module = fs->shader.get_handle(); + shader_stages[1].module = fs->get_handle(); shader_stages[1].pName = "main"; std::vector dynamic_state_descriptors = @@ -388,42 +478,33 @@ namespace vk info.stageCount = 2; info.pStages = shader_stages; info.pDynamicState = &dynamic_state_info; - info.layout = VK_NULL_HANDLE; + info.layout = m_shared_pipeline_layout; info.basePipelineIndex = -1; info.basePipelineHandle = VK_NULL_HANDLE; info.renderPass = vk::get_renderpass(m_device, properties.renderpass_key); auto compiler = vk::get_pipe_compiler(); - auto program = compiler->compile( - info, - vk::pipe_compiler::COMPILE_INLINE | vk::pipe_compiler::SEPARATE_SHADER_OBJECTS, - {}, - vs->uniforms, - fs->uniforms); - + auto program = compiler->compile(info, m_shared_pipeline_layout, vk::pipe_compiler::COMPILE_INLINE, {}, m_vs_inputs, m_fs_inputs); return program.release(); } - void shader_interpreter::update_fragment_textures(const std::array& sampled_images) + void shader_interpreter::update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set) { - // FIXME: Cannot use m_fragment_textures.start now since each interpreter has its own binding layout - auto [set, binding] = m_current_interpreter->get_uniform_location(::glsl::glsl_fragment_program, glsl::input_type_texture, "sampler1D_array[16]"); - if (binding == umax) - { - return; - } - const VkDescriptorImageInfo* texture_ptr = sampled_images.data(); - for (u32 i = 0; i < 4; ++i, ++binding, texture_ptr += 16) + for (u32 i = 0, binding = m_fragment_textures_start; i < 4; ++i, ++binding, texture_ptr += 16) { - m_current_interpreter->bind_uniform_array(texture_ptr, 16, set, binding); + set.push(texture_ptr, 16, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, binding); } } + VkDescriptorSet shader_interpreter::allocate_descriptor_set() + { + return m_descriptor_pool.allocate(m_shared_descriptor_layout); + } + glsl::program* shader_interpreter::get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, - const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, u32 vp_ctrl, u32 fp_ctrl) { @@ -463,12 +544,11 @@ namespace vk if (fp_ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_DEPTH_EXPORT; if (fp_ctrl & CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_F32_EXPORT; if (fp_ctrl & RSX_SHADER_CONTROL_USES_KIL) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_KIL; - if (fp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; - if (fp_metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; - if (fp_metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; + if (metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_TEXTURES; + if (metadata.has_branch_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_FLOW_CTRL; + if (metadata.has_pack_instructions) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_PACKING; if (rsx::method_registers.polygon_stipple_enabled()) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_STIPPLING; if (vp_ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_INSTANCING; - if (vp_metadata.referenced_textures_mask) key.compiler_opt |= program_common::interpreter::COMPILER_OPT_ENABLE_VTX_TEXTURES; if (m_current_key == key) [[likely]] { @@ -505,16 +585,4 @@ namespace vk { return m_fragment_instruction_start; } - - std::pair shader_interpreter::get_shaders() const - { - if (auto found = m_shader_cache.find(m_current_key.compiler_opt); found != m_shader_cache.end()) - { - auto fs = found->second.m_fs.get(); - auto vs = found->second.m_vs.get(); - return { vs, fs }; - } - - return { nullptr, nullptr }; - } }; diff --git a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h index 9d934b3ffa..d359ca343e 100644 --- a/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h +++ b/rpcs3/Emu/RSX/VK/VKShaderInterpreter.h @@ -5,9 +5,6 @@ #include "vkutils/descriptors.h" #include -class VKVertexProgram; -class VKFragmentProgram; - namespace vk { using ::program_hash_util::fragment_program_utils; @@ -15,7 +12,12 @@ namespace vk class shader_interpreter { + std::vector m_vs_inputs; + std::vector m_fs_inputs; + VkDevice m_device = VK_NULL_HANDLE; + VkDescriptorSetLayout m_shared_descriptor_layout = VK_NULL_HANDLE; + VkPipelineLayout m_shared_pipeline_layout = VK_NULL_HANDLE; glsl::program* m_current_interpreter = nullptr; struct pipeline_key @@ -39,12 +41,14 @@ namespace vk struct shader_cache_entry_t { - std::unique_ptr m_fs; - std::unique_ptr m_vs; + std::unique_ptr m_fs; + std::unique_ptr m_vs; }; std::unordered_map, key_hasher> m_program_cache; std::unordered_map m_shader_cache; + rsx::simple_array m_descriptor_pool_sizes; + vk::descriptor_pool m_descriptor_pool; u32 m_vertex_instruction_start = 0; u32 m_fragment_instruction_start = 0; @@ -52,12 +56,12 @@ namespace vk pipeline_key m_current_key{}; - VKVertexProgram* build_vs(u64 compiler_opt); - VKFragmentProgram* build_fs(u64 compiler_opt); - glsl::program* link(const vk::pipeline_props& properties, u64 compiler_opt); + std::pair create_layout(VkDevice dev); + void create_descriptor_pools(const vk::render_device& dev); - u32 init(VKVertexProgram* vk_prog, u64 compiler_opt) const; - u32 init(VKFragmentProgram* vk_prog, u64 compiler_opt) const; + glsl::shader* build_vs(u64 compiler_opt); + glsl::shader* build_fs(u64 compiler_opt); + glsl::program* link(const vk::pipeline_props& properties, u64 compiler_opt); public: void init(const vk::render_device& dev); @@ -65,19 +69,16 @@ namespace vk glsl::program* get( const vk::pipeline_props& properties, - const program_hash_util::fragment_program_utils::fragment_program_metadata& fp_metadata, - const program_hash_util::vertex_program_utils::vertex_program_metadata& vp_metadata, + const program_hash_util::fragment_program_utils::fragment_program_metadata& metadata, u32 vp_ctrl, u32 fp_ctrl); - // Retrieve the shader components that make up the current interpreter - std::pair get_shaders() const; - bool is_interpreter(const glsl::program* prog) const; u32 get_vertex_instruction_location() const; u32 get_fragment_instruction_location() const; - void update_fragment_textures(const std::array& sampled_images); + void update_fragment_textures(const std::array& sampled_images, vk::descriptor_set &set); + VkDescriptorSet allocate_descriptor_set(); }; } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 70d0972984..60f33f49c5 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -6,6 +6,7 @@ #include "vkutils/device.h" #include "../Program/GLSLCommon.h" + std::string VKVertexDecompilerThread::getFloatTypeName(usz elementCount) { return glsl::getFloatTypeNameImpl(elementCount); @@ -26,59 +27,14 @@ std::string VKVertexDecompilerThread::compareFunction(COMPARE f, const std::stri return glsl::compareFunctionImpl(f, Op0, Op1, scalar); } -void VKVertexDecompilerThread::prepareBindingTable() -{ - u32 location = 0; - vk_prog->binding_table.vertex_buffers_location = location; - location += 3; // Persistent verts, volatile and layout data - - vk_prog->binding_table.context_buffer_location = location++; - if (m_device_props.emulate_conditional_rendering) - { - vk_prog->binding_table.cr_pred_buffer_location = location++; - } - - std::memset(vk_prog->binding_table.vtex_location, 0xff, sizeof(vk_prog->binding_table.vtex_location)); - - for (const ParamType& PT : m_parr.params[PF_PARAM_UNIFORM]) - { - const bool is_texture_type = PT.type.starts_with("sampler"); - - for (const ParamItem& PI : PT.items) - { - if (is_texture_type) - { - const int id = vk::get_texture_index(PI.name); - vk_prog->binding_table.vtex_location[id] = location++; - continue; - } - - if (PI.name.starts_with("vc[")) - { - if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) - { - vk_prog->binding_table.cbuf_location = location++; - continue; - } - - vk_prog->binding_table.instanced_lut_buffer_location = location++; - vk_prog->binding_table.instanced_cbuf_location = location++; - continue; - } - } - } -} - void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) { - prepareBindingTable(); - OS << "#version 450\n\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; OS << - "layout(std140, set=0, binding=" << vk_prog->binding_table.context_buffer_location << ") uniform VertexContextBuffer\n" + "layout(std140, set = 0, binding = 0) uniform VertexContextBuffer\n" "{\n" " mat4 scale_offset_mat;\n" " ivec4 user_clip_enabled[2];\n" @@ -89,31 +45,13 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " float z_far;\n" "};\n\n"; - vk::glsl::program_input context_input = - { - .domain = glsl::glsl_vertex_program, - .type = vk::glsl::input_type_uniform_buffer, - .location = vk_prog->binding_table.context_buffer_location, - .name = "VertexContextBuffer" - }; - inputs.push_back(context_input); - if (m_device_props.emulate_conditional_rendering) { OS << - "layout(std430, set=0, binding=" << vk_prog->binding_table.cr_pred_buffer_location << ") readonly buffer EXT_Conditional_Rendering\n" + "layout(std430, set = 0, binding = 8) readonly buffer EXT_Conditional_Rendering\n" "{\n" " uint conditional_rendering_predicate;\n" "};\n\n"; - - vk::glsl::program_input predicate_input = - { - .domain = glsl::glsl_vertex_program, - .type = vk::glsl::input_type_storage_buffer, - .location = vk_prog->binding_table.cr_pred_buffer_location, - .name = "EXT_Conditional_Rendering" - }; - inputs.push_back(predicate_input); } OS << @@ -125,50 +63,52 @@ void VKVertexDecompilerThread::insertHeader(std::stringstream &OS) " uint layout_ptr_offset;\n" " uint xform_constants_offset;\n"; - u32 push_constants_size = 5 * sizeof(u32); if (m_device_props.emulate_conditional_rendering) { - push_constants_size += sizeof(u32); OS << " uint conditional_rendering_enabled;\n"; } OS << "};\n\n"; - vk::glsl::program_input push_constants = - { - .domain = glsl::glsl_vertex_program, - .type = vk::glsl::input_type_push_constant, - .bound_data = vk::glsl::push_constant_ref{ .offset = 0, .size = push_constants_size } - }; - inputs.push_back(push_constants); + vk::glsl::program_input in; + in.location = m_binding_table.vertex_params_bind_slot; + in.domain = glsl::glsl_vertex_program; + in.name = "VertexContextBuffer"; + in.type = vk::glsl::input_type_uniform_buffer; + inputs.push_back(in); } void VKVertexDecompilerThread::insertInputs(std::stringstream& OS, const std::vector& /*inputs*/) { - static const char* input_streams[] = - { - "persistent_input_stream", // Data stream with persistent vertex data (cacheable) - "volatile_input_stream", // Data stream with per-draw data (registers and immediate draw data) - "vertex_layout_stream" // Data stream defining vertex data layout" - }; + OS << "layout(set=0, binding=5) uniform usamplerBuffer persistent_input_stream;\n"; // Data stream with persistent vertex data (cacheable) + OS << "layout(set=0, binding=6) uniform usamplerBuffer volatile_input_stream;\n"; // Data stream with per-draw data (registers and immediate draw data) + OS << "layout(set=0, binding=7) uniform usamplerBuffer vertex_layout_stream;\n"; // Data stream defining vertex data layout - int location = vk_prog->binding_table.vertex_buffers_location; - for (const auto& stream : input_streams) - { - OS << "layout(set=0, binding=" << location << ") uniform usamplerBuffer " << stream << ";\n"; + vk::glsl::program_input in; + in.location = m_binding_table.vertex_buffers_first_bind_slot; + in.domain = glsl::glsl_vertex_program; + in.name = "persistent_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); - vk::glsl::program_input in; - in.location = location++; - in.domain = glsl::glsl_vertex_program; - in.name = stream; - in.type = vk::glsl::input_type_texel_buffer; - this->inputs.push_back(in); - } + in.location = m_binding_table.vertex_buffers_first_bind_slot + 1; + in.domain = glsl::glsl_vertex_program; + in.name = "volatile_input_stream"; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); + + in.location = m_binding_table.vertex_buffers_first_bind_slot + 2; + in.domain = glsl::glsl_vertex_program; + in.name = "vertex_layout_stream"; + in.type = vk::glsl::input_type_texel_buffer; + this->inputs.push_back(in); } void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std::vector & constants) { vk::glsl::program_input in; + u32 location = m_binding_table.vertex_textures_first_bind_slot; + for (const ParamType &PT : constants) { for (const ParamItem &PI : PT.items) @@ -177,12 +117,12 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std { if (!(m_prog.ctrl & RSX_SHADER_CONTROL_INSTANCED_CONSTANTS)) { - OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.vertex_constant_buffers_bind_slot) << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 vc[];\n"; OS << "};\n\n"; - in.location = vk_prog->binding_table.cbuf_location; + in.location = m_binding_table.vertex_constant_buffers_bind_slot; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -193,26 +133,26 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std else { // 1. Bind indirection lookup buffer - OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_lut_buffer_location << ") readonly buffer InstancingData\n"; + OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_lookup_table_bind_slot) << ") readonly buffer InstancingData\n"; OS << "{\n"; OS << " int constants_addressing_lookup[];\n"; OS << "};\n\n"; - in.location = vk_prog->binding_table.instanced_lut_buffer_location; + in.location = m_binding_table.instancing_lookup_table_bind_slot; in.domain = glsl::glsl_vertex_program; in.name = "InstancingData"; in.type = vk::glsl::input_type_storage_buffer; inputs.push_back(in); // 2. Bind actual constants buffer - OS << "layout(std430, set=0, binding=" << vk_prog->binding_table.instanced_cbuf_location << ") readonly buffer VertexConstantsBuffer\n"; + OS << "layout(std430, set=0, binding=" << static_cast(m_binding_table.instancing_constants_buffer_slot) << ") readonly buffer VertexConstantsBuffer\n"; OS << "{\n"; OS << " vec4 instanced_constants_array[];\n"; OS << "};\n\n"; OS << "#define CONSTANTS_ARRAY_LENGTH " << (properties.has_indexed_constants ? 468 : ::size32(m_constant_ids)) << "\n\n"; - in.location = vk_prog->binding_table.instanced_cbuf_location; + in.location = m_binding_table.instancing_constants_buffer_slot; in.domain = glsl::glsl_vertex_program; in.name = "VertexConstantsBuffer"; in.type = vk::glsl::input_type_storage_buffer; @@ -221,10 +161,12 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - if (PT.type.starts_with("sampler")) + if (PT.type == "sampler2D" || + PT.type == "samplerCube" || + PT.type == "sampler1D" || + PT.type == "sampler3D") { - const int id = vk::get_texture_index(PI.name); - in.location = vk_prog->binding_table.vtex_location[id]; + in.location = location; in.name = PI.name; in.type = vk::glsl::input_type_texture; @@ -248,7 +190,7 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std } } - OS << "layout(set=0, binding=" << in.location << ") uniform " << samplerType << " " << PI.name << ";\n"; + OS << "layout(set = 0, binding=" << location++ << ") uniform " << samplerType << " " << PI.name << ";\n"; } } } @@ -429,6 +371,8 @@ void VKVertexDecompilerThread::insertMainEnd(std::stringstream & OS) void VKVertexDecompilerThread::Task() { m_device_props.emulate_conditional_rendering = vk::emulate_conditional_rendering(); + m_binding_table = vk::g_render_device->get_pipeline_binding_table(); + m_shader = Decompile(); vk_prog->SetInputs(inputs); } diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.h b/rpcs3/Emu/RSX/VK/VKVertexProgram.h index 3422333fc6..1bb6dfd91c 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.h +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.h @@ -15,6 +15,7 @@ struct VKVertexDecompilerThread : public VertexProgramDecompiler std::string &m_shader; std::vector inputs; class VKVertexProgram *vk_prog; + vk::pipeline_binding_table m_binding_table{}; struct { @@ -35,8 +36,6 @@ protected: void insertMainStart(std::stringstream &OS) override; void insertMainEnd(std::stringstream &OS) override; - void prepareBindingTable(); - const RSXVertexProgram &rsx_vertex_program; public: VKVertexDecompilerThread(const RSXVertexProgram &prog, std::string& shader, ParamArray&, class VKVertexProgram &dst) @@ -62,19 +61,6 @@ public: vk::glsl::shader shader; std::vector uniforms; - // Quick attribute indices - struct - { - u32 context_buffer_location = umax; // Vertex program context - u32 cr_pred_buffer_location = umax; // Conditional rendering predicate - u32 vertex_buffers_location = umax; // Vertex input streams (3) - u32 cbuf_location = umax; // Vertex program constants register file - u32 instanced_lut_buffer_location = umax; // Instancing redirection table - u32 instanced_cbuf_location = umax; // Instancing constants register file - u32 vtex_location[4]; // Vertex textures (inf) - - } binding_table; - void Decompile(const RSXVertexProgram& prog); void Compile(); void SetInputs(std::vector& inputs); diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp index dc6562289e..c256070490 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr1/fsr_pass.cpp @@ -61,9 +61,6 @@ namespace vk // Fill with 0 to avoid sending incomplete/unused variables to the GPU memset(m_constants_buf, 0, sizeof(m_constants_buf)); - // No ssbo usage - ssbo_count = 0; - // Enable push constants use_push_constants = true; push_constants_size = push_constants_size_; @@ -71,33 +68,39 @@ namespace vk create(); } - std::vector fsr_pass::get_inputs() + std::vector> fsr_pass::get_descriptor_layout() + { + return + { + { VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1 }, + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1 } + }; + } + + void fsr_pass::declare_inputs() { std::vector inputs = { - glsl::program_input::make( + { ::glsl::program_domain::glsl_compute_program, - "InputTexture", - vk::glsl::input_type_texture, + vk::glsl::program_input_type::input_type_texture, + {}, {}, 0, - 0 - ), - - glsl::program_input::make( + "InputTexture" + }, + { ::glsl::program_domain::glsl_compute_program, - "OutputTexture", - vk::glsl::input_type_storage_texture, - 0, - 1 - ), + vk::glsl::program_input_type::input_type_texture, + {}, {}, + 1, + "OutputTexture" + } }; - auto result = compute_task::get_inputs(); - result.insert(result.end(), inputs.begin(), inputs.end()); - return result; + m_program->load_uniforms(inputs); } - void fsr_pass::bind_resources(const vk::command_buffer& /*cmd*/) + void fsr_pass::bind_resources() { // Bind relevant stuff if (!m_sampler) @@ -108,8 +111,8 @@ namespace vk VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_LINEAR, VK_FILTER_LINEAR, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, 0, 0); - m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, 0, 1); + m_program->bind_uniform({ m_sampler->value, m_input_image->value, m_input_image->image()->current_layout }, "InputTexture", VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, m_descriptor_set); + m_program->bind_uniform({ VK_NULL_HANDLE, m_output_image->value, m_output_image->image()->current_layout }, "OutputTexture", VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, m_descriptor_set); } void fsr_pass::run(const vk::command_buffer& cmd, vk::viewable_image* src, vk::viewable_image* dst, const size2u& input_size, const size2u& output_size) @@ -119,11 +122,6 @@ namespace vk m_input_size = input_size; m_output_size = output_size; - if (!m_program) - { - load_program(cmd); - } - configure(cmd); constexpr auto wg_size = 16; @@ -160,7 +158,7 @@ namespace vk static_cast(src_image->width()), static_cast(src_image->height()), // Size of the raw image to upscale (in case viewport does not cover it all) static_cast(m_output_size.width), static_cast(m_output_size.height)); // Size of output viewport (target size) - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } rcas_pass::rcas_pass() @@ -179,7 +177,7 @@ namespace vk auto cas_attenuation = 2.f - (g_cfg.video.vk.rcas_sharpening_intensity / 50.f); FsrRcasCon(&m_constants_buf[0], cas_attenuation); - vkCmdPushConstants(cmd, m_program->layout(), VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); + vkCmdPushConstants(cmd, m_pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constants_size, m_constants_buf); } } // Namespace FidelityFX diff --git a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h index 7bff58b049..c5b5b30e73 100644 --- a/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h +++ b/rpcs3/Emu/RSX/VK/upscalers/fsr_pass.h @@ -19,8 +19,9 @@ namespace vk size2u m_output_size; u32 m_constants_buf[20]; - std::vector get_inputs() override; - void bind_resources(const vk::command_buffer&) override; + std::vector> get_descriptor_layout() override; + void declare_inputs() override; + void bind_resources() override; virtual void configure(const vk::command_buffer& cmd) = 0; diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp index b5e62f33f6..7293180e08 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.cpp @@ -14,43 +14,44 @@ namespace vk public: inline void flush_all() { - std::lock_guard lock(m_notifications_lock); - for (auto& set : m_notification_list) { set->flush(); } - - m_notification_list.clear(); } void register_(descriptor_set* set) { - std::lock_guard lock(m_notifications_lock); + // Rare event, upon creation of a new set tracker. + // Check for spurious 'new' events when the aux context is taking over + for (const auto& set_ : m_notification_list) + { + if (set_ == set) return; + } m_notification_list.push_back(set); - // rsx_log.notice("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); + rsx_log.warning("[descriptor_manager::register] Now monitoring %u descriptor sets", m_notification_list.size()); } void deregister(descriptor_set* set) { - std::lock_guard lock(m_notifications_lock); + for (auto it = m_notification_list.begin(); it != m_notification_list.end(); ++it) + { + if (*it == set) + { + *it = m_notification_list.back(); + m_notification_list.pop_back(); + break; + } + } - m_notification_list.erase_if(FN(x == set)); - // rsx_log.notice("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); - } - - void destroy() - { - std::lock_guard lock(m_notifications_lock); - m_notification_list.clear(); + rsx_log.warning("[descriptor_manager::deregister] Now monitoring %u descriptor sets", m_notification_list.size()); } dispatch_manager() = default; private: rsx::simple_array m_notification_list; - std::mutex m_notifications_lock; dispatch_manager(const dispatch_manager&) = delete; dispatch_manager& operator = (const dispatch_manager&) = delete; @@ -66,11 +67,6 @@ namespace vk g_fxo->get().flush_all(); } - void destroy() - { - g_fxo->get().destroy(); - } - VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings) { VkDescriptorSetLayoutCreateInfo infos = {}; @@ -92,17 +88,17 @@ namespace vk } else { - binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT; + binding_flags[i] = VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT; } } - binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; + binding_infos.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; binding_infos.pNext = nullptr; binding_infos.bindingCount = ::size32(binding_flags); binding_infos.pBindingFlags = binding_flags.data(); infos.pNext = &binding_infos; - infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT; + infos.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; VkDescriptorSetLayout result; CHECK_RESULT(vkCreateDescriptorSetLayout(*g_render_device, &infos, nullptr, &result)); @@ -299,6 +295,11 @@ namespace vk m_in_use = true; m_update_after_bind_mask = g_render_device->get_descriptor_update_after_bind_support(); + + if (m_update_after_bind_mask) + { + g_fxo->get().register_(this); + } } else if (m_push_type_mask & ~m_update_after_bind_mask) { @@ -332,6 +333,11 @@ namespace vk return &m_handle; } + VkDescriptorSet descriptor_set::value() const + { + return m_handle; + } + void descriptor_set::push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding) { m_push_type_mask |= (1ull << type); @@ -411,24 +417,14 @@ namespace vk if (m_pending_copies.empty()) [[likely]] { m_pending_copies = std::move(copy_cmd); - return; } - - m_pending_copies += copy_cmd; - } - - void descriptor_set::push(rsx::simple_array& write_cmds, u32 type_mask) - { - m_push_type_mask |= type_mask; - -#if !defined(__clang__) || (__clang_major__ >= 16) - if (m_pending_writes.empty()) [[unlikely]] + else { - m_pending_writes = std::move(write_cmds); - return; + const auto old_size = m_pending_copies.size(); + const auto new_size = copy_cmd.size() + old_size; + m_pending_copies.resize(new_size); + std::copy(copy_cmd.begin(), copy_cmd.end(), m_pending_copies.begin() + old_size); } -#endif - m_pending_writes += write_cmds; } void descriptor_set::push(const descriptor_set_dynamic_offset_t& offset) @@ -442,31 +438,12 @@ namespace vk m_dynamic_offsets[offset.location] = offset.value; } - void descriptor_set::on_bind() - { - if (!m_push_type_mask) - { - ensure(m_pending_writes.empty()); - return; - } - - // We have queued writes - if ((m_push_type_mask & ~m_update_after_bind_mask) || - (m_pending_writes.size() >= max_cache_size)) - { - flush(); - return; - } - - // Register for async flush - ensure(m_update_after_bind_mask); - g_fxo->get().register_(this); - } - void descriptor_set::bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout) { - // Notify - on_bind(); + if ((m_push_type_mask & ~m_update_after_bind_mask) || (m_pending_writes.size() >= max_cache_size)) + { + flush(); + } vkCmdBindDescriptorSets(cmd, bind_point, layout, 0, 1, &m_handle, ::size32(m_dynamic_offsets), m_dynamic_offsets.data()); } diff --git a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h index c2cf5deb20..6c61488b6e 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/descriptors.h +++ b/rpcs3/Emu/RSX/VK/vkutils/descriptors.h @@ -94,19 +94,15 @@ namespace vk void swap(descriptor_set& other); descriptor_set& operator = (VkDescriptorSet set); - VkDescriptorSet value() const { return m_handle; } - operator bool() const { return m_handle != VK_NULL_HANDLE; } - VkDescriptorSet* ptr(); + VkDescriptorSet value() const; void push(const VkBufferView& buffer_view, VkDescriptorType type, u32 binding); void push(const VkDescriptorBufferInfo& buffer_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo& image_info, VkDescriptorType type, u32 binding); void push(const VkDescriptorImageInfo* image_info, u32 count, VkDescriptorType type, u32 binding); void push(rsx::simple_array& copy_cmd, u32 type_mask = umax); - void push(rsx::simple_array& write_cmds, u32 type_mask = umax); void push(const descriptor_set_dynamic_offset_t& offset); - void on_bind(); void bind(const vk::command_buffer& cmd, VkPipelineBindPoint bind_point, VkPipelineLayout layout); void flush(); @@ -122,7 +118,7 @@ namespace vk rsx::simple_array m_image_info_pool; rsx::simple_array m_dynamic_offsets; -#if defined(__clang__) && (__clang_major__ < 16) +#ifdef __clang__ // Clang (pre 16.x) does not support LWG 2089, std::construct_at for POD types struct WriteDescriptorSetT : public VkWriteDescriptorSet { @@ -162,7 +158,6 @@ namespace vk { void init(); void flush(); - void destroy(); VkDescriptorSetLayout create_layout(const rsx::simple_array& bindings); } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 907d692e85..85d9148834 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -813,6 +813,7 @@ namespace vk memory_map = vk::get_memory_mapping(pdev); m_formats_support = vk::get_optimal_tiling_supported_formats(pdev); + m_pipeline_binding_table = vk::get_pipeline_binding_table(pdev); if (g_cfg.video.disable_vulkan_mem_allocator) { @@ -1147,4 +1148,15 @@ namespace vk return result; } + + pipeline_binding_table get_pipeline_binding_table(const vk::physical_device& dev) + { + pipeline_binding_table result{}; + + // Need to check how many samplers are supported by the driver + const auto usable_samplers = std::min(dev.get_limits().maxPerStageDescriptorSampledImages, 32u); + result.vertex_textures_first_bind_slot = result.textures_first_bind_slot + usable_samplers; + result.total_descriptor_bindings = result.vertex_textures_first_bind_slot + 4; + return result; + } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.h b/rpcs3/Emu/RSX/VK/vkutils/device.h index 0511802aac..63e30d3d42 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.h +++ b/rpcs3/Emu/RSX/VK/vkutils/device.h @@ -137,6 +137,7 @@ namespace vk physical_device* pgpu = nullptr; memory_type_mapping memory_map{}; gpu_formats_support m_formats_support{}; + pipeline_binding_table m_pipeline_binding_table{}; std::unique_ptr m_allocator; VkDevice dev = VK_NULL_HANDLE; @@ -167,6 +168,7 @@ namespace vk const physical_device& gpu() const { return *pgpu; } const memory_type_mapping& get_memory_mapping() const { return memory_map; } const gpu_formats_support& get_formats_support() const { return m_formats_support; } + const pipeline_binding_table& get_pipeline_binding_table() const { return m_pipeline_binding_table; } const gpu_shader_types_support& get_shader_types_support() const { return pgpu->shader_types_support; } const custom_border_color_features& get_custom_border_color_support() const { return pgpu->custom_border_color_support; } const multidraw_features get_multidraw_support() const { return pgpu->multidraw_support; } @@ -204,6 +206,7 @@ namespace vk memory_type_mapping get_memory_mapping(const physical_device& dev); gpu_formats_support get_optimal_tiling_supported_formats(const physical_device& dev); + pipeline_binding_table get_pipeline_binding_table(const physical_device& dev); extern const render_device* g_render_device; } diff --git a/rpcs3/tests/test_simple_array.cpp b/rpcs3/tests/test_simple_array.cpp index 916284a6cd..f64e01200e 100644 --- a/rpcs3/tests/test_simple_array.cpp +++ b/rpcs3/tests/test_simple_array.cpp @@ -189,29 +189,4 @@ namespace rsx EXPECT_EQ(arr[i], i + 1); } } - - TEST(SimpleArray, Merge) - { - rsx::simple_array arr{ 1 }; - rsx::simple_array arr2{ 2, 3, 4, 5, 6, 7, 8, 9 }; - rsx::simple_array arr3{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 }; - - // Check small vector optimization - EXPECT_TRUE(arr.is_local_storage()); - - // Small vector optimization holds after append - arr += arr2; - EXPECT_TRUE(arr.is_local_storage()); - - // Exceed the boundary and we move into dynamic alloc - arr += arr3; - EXPECT_FALSE(arr.is_local_storage()); - - // Verify contents - EXPECT_EQ(arr.size(), 30); - for (int i = 0; i < 30; ++i) - { - EXPECT_EQ(arr[i], i + 1); - } - } }