From 943752db305453e92c102b0bd5243b0afdbbc6d9 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sat, 28 May 2022 19:38:29 +0300 Subject: [PATCH] gl: Compute optimizations - Keep buffers around longer to allow driver heurestics to work - Properly initialize the shaders to allow optimal workgroup dispatch size --- rpcs3/Emu/RSX/GL/GLCompute.cpp | 7 +++++++ rpcs3/Emu/RSX/GL/GLCompute.h | 3 +++ rpcs3/Emu/RSX/GL/GLTexture.cpp | 27 +++++++++++++-------------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLCompute.cpp b/rpcs3/Emu/RSX/GL/GLCompute.cpp index 091298a115..d88b66cdce 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.cpp +++ b/rpcs3/Emu/RSX/GL/GLCompute.cpp @@ -24,10 +24,17 @@ namespace gl optimal_kernel_size = 256 / optimal_group_size; glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, reinterpret_cast(&max_invocations_x)); + + initialized = true; } void compute_task::create() { + if (!initialized) + { + initialize(); + } + if (!compiled) { m_shader.create(::glsl::program_domain::glsl_compute_program, m_src); diff --git a/rpcs3/Emu/RSX/GL/GLCompute.h b/rpcs3/Emu/RSX/GL/GLCompute.h index 06708b09e3..4021722ee3 100644 --- a/rpcs3/Emu/RSX/GL/GLCompute.h +++ b/rpcs3/Emu/RSX/GL/GLCompute.h @@ -14,6 +14,7 @@ namespace gl gl::glsl::shader m_shader; gl::glsl::program m_program; bool compiled = false; + bool initialized = false; // Device-specific options bool unroll_loops = true; @@ -260,6 +261,8 @@ namespace gl { ensure((sizeof(_BlockType) & 3) == 0); // "Unsupported block type" + initialize(); + m_src = #include "../Program/GLSLSnippets/GPUDeswizzle.glsl" ; diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index a453e815fb..f4bea436ae 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -18,19 +18,20 @@ namespace gl buffer g_typeless_transfer_buffer; buffer g_upload_transfer_buffer; buffer g_compute_decode_buffer; + buffer g_deswizzle_scratch_buffer; std::pair prepare_compute_resources(usz staging_data_length) { if (g_upload_transfer_buffer.size() < static_cast(staging_data_length)) { g_upload_transfer_buffer.remove(); - g_upload_transfer_buffer.create(staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW); + g_upload_transfer_buffer.create(gl::buffer::target::pixel_unpack, staging_data_length, nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW); } if (g_compute_decode_buffer.size() < static_cast(staging_data_length) * 3) { g_compute_decode_buffer.remove(); - g_compute_decode_buffer.create(std::max(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY); + g_compute_decode_buffer.create(gl::buffer::target::pixel_pack, std::max(512, staging_data_length * 3), nullptr, buffer::memory_type::local, GL_STATIC_COPY); } return { &g_upload_transfer_buffer, &g_compute_decode_buffer }; @@ -41,6 +42,7 @@ namespace gl g_typeless_transfer_buffer.remove(); g_upload_transfer_buffer.remove(); g_compute_decode_buffer.remove(); + g_deswizzle_scratch_buffer.remove(); } template @@ -727,8 +729,6 @@ namespace gl u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); u64 image_linear_size; - gl::buffer deswizzle_buf; - switch (gl_type) { case GL_BYTE: @@ -792,13 +792,14 @@ namespace gl else { // 2.1 Copy data to deswizzle buf - if (deswizzle_buf.size() < image_linear_size) + if (g_deswizzle_scratch_buffer.size() < image_linear_size) { - deswizzle_buf.remove(); - deswizzle_buf.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local); + g_deswizzle_scratch_buffer.remove(); + g_deswizzle_scratch_buffer.create(gl::buffer::target::ssbo, image_linear_size, nullptr, gl::buffer::memory_type::local); + rsx_log.error("DESWZ BUF @0x%x", g_deswizzle_scratch_buffer.id()); } - upload_scratch_mem->copy_to(&deswizzle_buf, 0, 0, image_linear_size); + upload_scratch_mem->copy_to(&g_deswizzle_scratch_buffer, 0, 0, image_linear_size); // 2.2 Apply compute transform to deswizzle input and dump it in compute_scratch_mem ensure(op.element_size == 2 || op.element_size == 4); @@ -810,22 +811,22 @@ namespace gl if (op.element_size == 4) [[ likely ]] { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); } else { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); } } else { if (op.element_size == 4) [[ likely ]] { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); } else { - do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &deswizzle_buf, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); + do_deswizzle_transformation(cmd, block_size, compute_scratch_mem, &g_deswizzle_scratch_buffer, image_linear_size, layout.width_in_texel, layout.height_in_texel, layout.depth); } } } @@ -849,8 +850,6 @@ namespace gl dst->copy_from(out_pointer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); } } - - deswizzle_buf.remove(); } }