rsx: Minor optimization

- Defer resolving image copy operations to the binding step
This commit is contained in:
kd-11 2017-11-01 15:34:38 +03:00
parent bbcb6b6851
commit eed55a446c
5 changed files with 147 additions and 23 deletions

View file

@ -213,9 +213,28 @@ namespace rsx
u32 address_range = 0;
};
struct deferred_subresource
{
image_resource_type external_handle = 0;
u32 gcm_format = 0;
u16 x = 0;
u16 y = 0;
u16 width = 0;
u16 height = 0;
deferred_subresource()
{}
deferred_subresource(image_resource_type _res, u32 _fmt, u16 _x, u16 _y, u16 _w, u16 _h):
external_handle(_res), gcm_format(_fmt), x(_x), y(_y), width(_w), height(_h)
{}
};
struct sampled_image_descriptor : public sampled_image_descriptor_base
{
image_view_type image_handle = 0;
deferred_subresource external_subresource_desc;
bool flag = false;
sampled_image_descriptor()
{}
@ -228,6 +247,18 @@ namespace rsx
scale_x = x_scale;
scale_y = y_scale;
}
sampled_image_descriptor(image_resource_type external_handle, u32 gcm_format, u16 x_offset, u16 y_offset, u16 width, u16 height,
const texture_upload_context ctx, const bool is_depth, const f32 x_scale, const f32 y_scale)
{
external_subresource_desc = {external_handle, gcm_format, x_offset, y_offset, width, height};
image_handle = 0;
upload_context = ctx;
is_depth_texture = is_depth;
scale_x = x_scale;
scale_y = y_scale;
}
};
private:
@ -1001,7 +1032,7 @@ namespace rsx
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, w, h), texture_upload_context::framebuffer_storage,
return{ texptr->get_surface(), format, 0, 0, w, h, texture_upload_context::framebuffer_storage,
false, get_internal_scaling_x(texptr), get_internal_scaling_y(texptr) };
}
@ -1044,7 +1075,7 @@ namespace rsx
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
return{ create_temporary_subresource_view(cmd, texptr, format, 0, 0, w, h), texture_upload_context::framebuffer_storage,
return{ texptr->get_surface(), format, 0, 0, w, h, texture_upload_context::framebuffer_storage,
true, get_internal_scaling_x(texptr), get_internal_scaling_y(texptr) };
}
@ -1116,15 +1147,15 @@ namespace rsx
return{ rsc.surface->get_view(), texture_upload_context::framebuffer_storage, rsc.is_depth_surface, get_internal_scaling_x(rsc.surface), get_internal_scaling_y(rsc.surface) };
}
else return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage,
else return{ rsc.surface->get_surface(), format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true), texture_upload_context::framebuffer_storage,
rsc.is_depth_surface, get_internal_scaling_x(rsc.surface), get_internal_scaling_y(rsc.surface) };
}
else
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
return{ create_temporary_subresource_view(cmd, rsc.surface, format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true)), texture_upload_context::framebuffer_storage,
return{ rsc.surface->get_surface(), format, rsx::apply_resolution_scale(rsc.x, false), rsx::apply_resolution_scale(rsc.y, false),
rsx::apply_resolution_scale(rsc.w, true), rsx::apply_resolution_scale(rsc.h, true), texture_upload_context::framebuffer_storage,
rsc.is_depth_surface, get_internal_scaling_x(rsc.surface), get_internal_scaling_y(rsc.surface) };
}
}
@ -1180,8 +1211,7 @@ namespace rsx
}
auto src_image = surface->get_raw_texture();
if (auto result = create_temporary_subresource_view(cmd, &src_image, format, offset_x, offset_y, tex_width, tex_height))
return{ result, texture_upload_context::blit_engine_dst, surface->is_depth_texture(), 1.f, 1.f };
return{ src_image, format, offset_x, offset_y, tex_width, tex_height, texture_upload_context::blit_engine_dst, surface->is_depth_texture(), 1.f, 1.f };
}
}
}

View file

@ -377,13 +377,8 @@ void GLGSRender::end()
if (rsx::method_registers.fragment_textures[i].enabled())
{
glActiveTexture(GL_TEXTURE0 + i);
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.fragment_textures[i], m_rtts);
m_gl_sampler_states[i].apply(rsx::method_registers.fragment_textures[i]);
GLenum target = get_gl_target_for_texture(rsx::method_registers.fragment_textures[i]);
glBindTexture(target, sampler_state->image_handle);
}
else
{
@ -406,11 +401,7 @@ void GLGSRender::end()
if (rsx::method_registers.vertex_textures[i].enabled())
{
const int texture_index = i + rsx::limits::fragment_textures_count;
glActiveTexture(GL_TEXTURE0 + texture_index);
*sampler_state = m_gl_texture_cache.upload_texture(unused, rsx::method_registers.vertex_textures[i], m_rtts);
glBindTexture(GL_TEXTURE_2D, static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get())->image_handle);
}
else
*sampler_state = {};
@ -444,6 +435,72 @@ void GLGSRender::end()
//glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
}
//Bind textures and resolve external copy operations
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
int unused_location;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
{
if (m_program->uniforms.has_location("tex" + std::to_string(i), &unused_location))
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (sampler_state->flag)
continue;
sampler_state->flag = true;
auto &tex = rsx::method_registers.fragment_textures[i];
glActiveTexture(GL_TEXTURE0 + i);
GLenum target = get_gl_target_for_texture(tex);
if (sampler_state->image_handle)
{
glBindTexture(target, sampler_state->image_handle);
}
else if (sampler_state->external_subresource_desc.external_handle)
{
glBindTexture(target, m_gl_texture_cache.create_temporary_subresource(sampler_state->external_subresource_desc));
m_textures_dirty[i] = true;
}
else
{
glBindTexture(target, GL_NONE);
}
}
}
for (int i = 0; i < rsx::limits::vertex_textures_count; ++i)
{
if (m_program->uniforms.has_location("vtex" + std::to_string(i), &unused_location))
{
auto sampler_state = static_cast<gl::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
if (sampler_state->flag)
continue;
sampler_state->flag = true;
glActiveTexture(GL_TEXTURE0 + rsx::limits::fragment_textures_count + i);
if (sampler_state->image_handle)
{
glBindTexture(GL_TEXTURE_2D, sampler_state->image_handle);
}
else if (sampler_state->external_subresource_desc.external_handle)
{
glBindTexture(GL_TEXTURE_2D, m_gl_texture_cache.create_temporary_subresource(sampler_state->external_subresource_desc));
m_vertex_textures_dirty[i] = true;
}
else
{
glBindTexture(GL_TEXTURE_2D, GL_NONE);
}
}
}
std::chrono::time_point<steady_clock> textures_end = steady_clock::now();
m_textures_upload_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [](gl::render_target *surface)
@ -1208,6 +1265,7 @@ void GLGSRender::flip(int buffer)
// Cleanup
m_gl_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
for (auto &tex : m_rtts.invalidated_resources)
tex->remove();

View file

@ -742,6 +742,12 @@ namespace gl
m_hw_blitter.destroy();
}
inline u32 create_temporary_subresource(deferred_subresource& desc)
{
void* unused = nullptr;
return create_temporary_subresource_view(unused, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height);
}
bool is_depth_texture(const u32 rsx_address, const u32 rsx_size) override
{
reader_lock lock(m_cache_mutex);

View file

@ -1259,14 +1259,22 @@ void VKGSRender::end()
}
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(fs_sampler_state[i].get());
if (!sampler_state->image_handle)
auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->external_subresource_desc.external_handle)
{
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_textures_dirty[i] = true;
}
if (!image_ptr)
{
LOG_ERROR(RSX, "Texture upload failed to texture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
m_program->bind_uniform({ fs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
m_program->bind_uniform({ fs_sampler_handles[i]->value, image_ptr->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "tex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
@ -1281,14 +1289,22 @@ void VKGSRender::end()
}
auto sampler_state = static_cast<vk::texture_cache::sampled_image_descriptor*>(vs_sampler_state[i].get());
if (!sampler_state->image_handle)
auto image_ptr = sampler_state->image_handle;
if (!image_ptr && sampler_state->external_subresource_desc.external_handle)
{
image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc);
m_vertex_textures_dirty[i] = true;
}
if (!image_ptr)
{
LOG_ERROR(RSX, "Texture upload failed to vtexture index %d. Binding null sampler.", i);
m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
continue;
}
m_program->bind_uniform({ vs_sampler_handles[i]->value, sampler_state->image_handle->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, "vtex" + std::to_string(i), m_current_frame->descriptor_set);
}
}
@ -1707,6 +1723,7 @@ void VKGSRender::advance_queued_frames()
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
//Remove stale framebuffers. Ref counted to prevent use-after-free
m_framebuffers_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
@ -2796,8 +2813,10 @@ void VKGSRender::flip(int buffer)
auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
m_framebuffers_to_clean.push_back(std::move(direct_fbo));

View file

@ -365,6 +365,7 @@ namespace vk
//Memory held by this temp storage object
u32 block_size = 0;
//Frame id tag
const u64 frame_tag = vk::get_current_frame_id();
discarded_storage(std::unique_ptr<vk::image_view>& _view)
@ -718,6 +719,11 @@ namespace vk
purge_cache();
}
inline vk::image_view* create_temporary_subresource(vk::command_buffer &cmd, deferred_subresource& desc)
{
return create_temporary_subresource_view(cmd, desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height);
}
bool is_depth_texture(const u32 rsx_address, const u32 rsx_size) override
{
reader_lock lock(m_cache_mutex);
@ -829,7 +835,12 @@ namespace vk
const u32 get_texture_memory_in_use() const override
{
return m_texture_memory_in_use + m_discarded_memory_size;
return m_texture_memory_in_use;
}
const u32 get_temporary_memory_in_use()
{
return m_discarded_memory_size;
}
};
}