rsx: Texture cache fixes - Optionally attempt to merge framebuffers into an atlas if partial resources are missing - Support for data update requests to the temporary subresource handler This is useful for framebuffer feedback loops where a new copy is needed after every draw call (resource is always dirty)

This commit is contained in:
kd-11 2018-02-21 13:46:23 +03:00
parent 4487cc8e7a
commit 01349b8cee
4 changed files with 272 additions and 35 deletions

View file

@ -251,10 +251,22 @@ namespace rsx
u32 address_range = 0;
};
struct copy_region_descriptor
{
image_resource_type src;
u16 src_x;
u16 src_y;
u16 dst_x;
u16 dst_y;
u16 w;
u16 h;
};
struct deferred_subresource
{
image_resource_type external_handle = 0;
std::array<image_resource_type, 6> external_cubemap_sources;
std::vector<copy_region_descriptor> sections_to_copy;
u32 base_address = 0;
u32 gcm_format = 0;
u16 x = 0;
@ -262,6 +274,8 @@ namespace rsx
u16 width = 0;
u16 height = 0;
bool is_cubemap = false;
bool is_copy_cmd = false;
bool update_cached = false;
deferred_subresource()
{}
@ -370,6 +384,8 @@ namespace rsx
virtual void set_up_remap_vector(section_storage_type& section, const std::pair<std::array<u8, 4>, std::array<u8, 4>>& remap_vector) = 0;
virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0;
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array<image_resource_type, 6>& sources) = 0;
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy) = 0;
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
constexpr u32 get_block_size() const { return 0x1000000; }
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
@ -1214,14 +1230,19 @@ namespace rsx
found_desc.width != desc.width || found_desc.height != desc.height)
continue;
if (desc.update_cached)
update_image_contents(cmd, It->second.second, desc.external_handle, desc.width, desc.height);
return It->second.second;
}
image_view_type result = 0;
if (!desc.is_cubemap)
result = create_temporary_subresource_view(cmd, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height);
else
if (desc.is_copy_cmd)
result = generate_atlas_from_images(cmd, desc.gcm_format, desc.width, desc.height, desc.sections_to_copy);
else if (desc.is_cubemap)
result = generate_cubemap_from_images(cmd, desc.gcm_format, desc.width, desc.external_cubemap_sources);
else
result = create_temporary_subresource_view(cmd, &desc.external_handle, desc.gcm_format, desc.x, desc.y, desc.width, desc.height);
if (result)
{
@ -1238,7 +1259,7 @@ namespace rsx
template <typename render_target_type, typename surface_store_type>
sampled_image_descriptor process_framebuffer_resource(commandbuffer_type& cmd, render_target_type texptr, u32 texaddr, u32 gcm_format, surface_store_type& m_rtts,
u16 tex_width, u16 tex_height, rsx::texture_dimension_extended extended_dimension, bool is_depth)
u16 tex_width, u16 tex_height, u16 tex_pitch, rsx::texture_dimension_extended extended_dimension, bool is_depth)
{
const u32 format = gcm_format & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN);
const auto surface_width = texptr->get_surface_width();
@ -1321,7 +1342,43 @@ namespace rsx
scale_y = 0.f;
}
if (internal_width > surface_width || internal_height > surface_height)
{
auto bpp = get_format_block_size_in_bytes(format);
auto overlapping = m_rtts.get_merged_texture_memory_region(texaddr, tex_width, tex_height, tex_pitch, bpp);
if (overlapping.size() > 1)
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
sampled_image_descriptor result = { texptr->get_surface(), texaddr, format, 0, 0, w, h,
texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y,
rsx::texture_dimension_extended::texture_dimension_2d };
result.external_subresource_desc.is_copy_cmd = true;
result.external_subresource_desc.sections_to_copy.reserve(overlapping.size());
for (auto &section : overlapping)
{
result.external_subresource_desc.sections_to_copy.push_back
({
section.surface->get_surface(),
rsx::apply_resolution_scale(section.src_x, true),
rsx::apply_resolution_scale(section.src_y, true),
rsx::apply_resolution_scale(section.dst_x, true),
rsx::apply_resolution_scale(section.dst_y, true),
rsx::apply_resolution_scale(section.width, true),
rsx::apply_resolution_scale(section.height, true)
});
}
return result;
}
}
bool requires_processing = surface_width > internal_width || surface_height > internal_height;
bool update_subresource_cache = false;
if (!requires_processing)
{
//NOTE: The scale also accounts for sampling outside the RTT region, e.g render to one quadrant but send whole texture for sampling
@ -1343,6 +1400,7 @@ namespace rsx
{
LOG_WARNING(RSX, "Attempting to sample a currently bound render target @ 0x%x", texaddr);
requires_processing = true;
update_subresource_cache = true;
break;
}
else
@ -1362,6 +1420,7 @@ namespace rsx
{
LOG_WARNING(RSX, "Attempting to sample a currently bound depth surface @ 0x%x", texaddr);
requires_processing = true;
update_subresource_cache = true;
}
else
{
@ -1376,8 +1435,12 @@ namespace rsx
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
const auto h = rsx::apply_resolution_scale(internal_height, true);
return{ texptr->get_surface(), texaddr, format, 0, 0, w, h, texture_upload_context::framebuffer_storage,
sampled_image_descriptor result = { texptr->get_surface(), texaddr, format, 0, 0, w, h, texture_upload_context::framebuffer_storage,
is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d };
result.external_subresource_desc.update_cached = update_subresource_cache;
return result;
}
return{ texptr->get_view(), texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d };
@ -1400,8 +1463,9 @@ namespace rsx
const auto extended_dimension = tex.get_extended_texture_dimension();
u16 depth = 0;
u16 tex_height = (u16)tex.height();
u16 tex_pitch = tex.pitch();
const u16 tex_width = tex.width();
u16 tex_pitch = is_compressed_format? (u16)(get_texture_size(tex) / tex_height) : tex.pitch(); //NOTE: Compressed textures dont have a real pitch (tex_size = (w*h)/6)
if (tex_pitch == 0) tex_pitch = tex_width * get_format_block_size_in_bytes(format);
switch (extended_dimension)
{
@ -1428,7 +1492,7 @@ namespace rsx
{
if (test_framebuffer(texaddr + texptr->raster_address_offset))
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, false);
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, false);
}
else
{
@ -1441,7 +1505,7 @@ namespace rsx
{
if (test_framebuffer(texaddr + texptr->raster_address_offset))
{
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, extended_dimension, true);
return process_framebuffer_resource(cmd, texptr, texaddr, tex.format(), m_rtts, tex_width, tex_height, tex_pitch, extended_dimension, true);
}
else
{
@ -1451,9 +1515,6 @@ namespace rsx
}
}
tex_pitch = is_compressed_format? (u16)(get_texture_size(tex) / tex_height) : tex_pitch; //NOTE: Compressed textures dont have a real pitch (tex_size = (w*h)/6)
if (tex_pitch == 0) tex_pitch = tex_width * get_format_block_size_in_bytes(format);
const bool unnormalized = (tex.format() & CELL_GCM_TEXTURE_UN) != 0;
f32 scale_x = (unnormalized) ? (1.f / tex_width) : 1.f;
f32 scale_y = (unnormalized) ? (1.f / tex_height) : 1.f;