rsx: Handle transfer_read differently from transfer_write

- Transfer writes are expected to clobber surface cache contents. Do NOT reload from CPU memory for writes.
- TODO: During transfer write to surface cache objects, lock memory if it was unlocked to avoid silly problems.
This commit is contained in:
kd-11 2021-05-08 19:08:32 +03:00 committed by kd-11
parent b085284c55
commit e3944bc67f
12 changed files with 106 additions and 38 deletions

View file

@ -36,11 +36,59 @@ namespace rsx
bytes = 2 bytes = 2
}; };
enum surface_access : u32 class surface_access // This is simply a modified enum class
{ {
read = 0, public:
write = 1, // Publicly visible enumerators
transfer = 2 enum
{
shader_read = 0,
shader_write = 1,
transfer_read = 2,
transfer_write = 4,
};
private:
// Meta
enum
{
all_writes = (shader_write | transfer_write),
all_reads = (shader_read | transfer_read),
all_transfer = (transfer_read | transfer_write)
};
u32 value_;
public:
// Ctor
surface_access(u32 value) : value_(value)
{}
// Quick helpers
inline bool is_read() const
{
return !(value_ & ~all_reads);
}
inline bool is_write() const
{
return !(value_ & ~all_writes);
}
inline bool is_transfer() const
{
return !(value_ & ~all_transfer);
}
bool operator == (const surface_access& other) const
{
return value_ == other.value_;
}
bool operator == (u32 other) const
{
return value_ == other;
}
}; };
// Defines how the underlying PS3-visible memory backed by a texture is accessed // Defines how the underlying PS3-visible memory backed by a texture is accessed

View file

@ -837,8 +837,11 @@ namespace rsx
continue; continue;
auto surface = tex_info.second.get(); auto surface = tex_info.second.get();
if (access == rsx::surface_access::transfer && surface->write_through()) if (access.is_transfer() && access.is_read() && surface->write_through())
{
// The surface has no data other than what can be loaded from CPU
continue; continue;
}
if (!rsx::pitch_compatible(surface, required_pitch, required_height)) if (!rsx::pitch_compatible(surface, required_pitch, required_height))
continue; continue;
@ -1128,7 +1131,7 @@ namespace rsx
if (surface->dirty()) if (surface->dirty())
{ {
// Force memory barrier to release some resources // Force memory barrier to release some resources
surface->memory_barrier(cmd, rsx::surface_access::read); surface->memory_barrier(cmd, rsx::surface_access::shader_read);
} }
else if (!surface->test()) else if (!surface->test())
{ {

View file

@ -623,7 +623,7 @@ namespace rsx
if (spp == 1 || sample_layout == rsx::surface_sample_layout::ps3) if (spp == 1 || sample_layout == rsx::surface_sample_layout::ps3)
return; return;
ensure(access_type != rsx::surface_access::write); ensure(access_type.is_read() || access_type.is_transfer());
transform_samples_to_pixels(region); transform_samples_to_pixels(region);
} }
}; };

View file

@ -1630,7 +1630,7 @@ namespace rsx
if (options.prefer_surface_cache) if (options.prefer_surface_cache)
{ {
const u16 block_h = (attr.depth * attr.slice_h); const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read); overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
if (!overlapping_fbos.empty()) if (!overlapping_fbos.empty())
{ {
@ -1695,7 +1695,7 @@ namespace rsx
{ {
// Now check for surface cache hits // Now check for surface cache hits
const u16 block_h = (attr.depth * attr.slice_h); const u16 block_h = (attr.depth * attr.slice_h);
overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::read); overlapping_fbos = m_rtts.get_merged_texture_memory_region(cmd, attr.address, attr.width, block_h, attr.pitch, attr.bpp, rsx::surface_access::shader_read);
} }
if (!overlapping_fbos.empty() || !overlapping_locals.empty()) if (!overlapping_fbos.empty() || !overlapping_locals.empty())
@ -2171,9 +2171,9 @@ namespace rsx
src_address += (src.width - src_w) * src_bpp; src_address += (src.width - src_w) * src_bpp;
} }
auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, bool allow_clipped) -> typename surface_store_type::surface_overlap_info auto rtt_lookup = [&m_rtts, &cmd, &scale_x, &scale_y, this](u32 address, u32 width, u32 height, u32 pitch, u8 bpp, rsx::flags32_t access, bool allow_clipped) -> typename surface_store_type::surface_overlap_info
{ {
const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, rsx::surface_access::transfer); const auto list = m_rtts.get_merged_texture_memory_region(cmd, address, width, height, pitch, bpp, access);
if (list.empty()) if (list.empty())
{ {
return {}; return {};
@ -2256,11 +2256,18 @@ namespace rsx
// Check if src/dst are parts of render targets // Check if src/dst are parts of render targets
typename surface_store_type::surface_overlap_info dst_subres; typename surface_store_type::surface_overlap_info dst_subres;
bool use_null_region = false; bool use_null_region = false;
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
// NOTE: Grab the src first as requirements for reading are more strict than requirements for writing
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, surface_access::transfer_read, false);
src_is_render_target = src_subres.surface != nullptr;
if (get_location(dst_address) == CELL_GCM_LOCATION_LOCAL) if (get_location(dst_address) == CELL_GCM_LOCATION_LOCAL)
{ {
// TODO: HACK // TODO: HACK
// After writing, it is required to lock the memory range from access! // After writing, it is required to lock the memory range from access!
dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, false); dst_subres = rtt_lookup(dst_address, dst_w, dst_h, dst.pitch, dst_bpp, surface_access::transfer_write, false);
dst_is_render_target = dst_subres.surface != nullptr; dst_is_render_target = dst_subres.surface != nullptr;
} }
else else
@ -2272,10 +2279,6 @@ namespace rsx
m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch* dst_h)); m_rtts.invalidate_range(utils::address_range::start_length(dst_address, dst.pitch* dst_h));
} }
// TODO: Handle cases where src or dst can be a depth texture while the other is a color texture - requires a render pass to emulate
auto src_subres = rtt_lookup(src_address, src_w, src_h, src.pitch, src_bpp, false);
src_is_render_target = src_subres.surface != nullptr;
if (src_is_render_target) if (src_is_render_target)
{ {
const auto surf = src_subres.surface; const auto surf = src_subres.surface;
@ -2543,7 +2546,7 @@ namespace rsx
// Destination dimensions are relaxed (true) // Destination dimensions are relaxed (true)
dst_area = dst_subres.src_area; dst_area = dst_subres.src_area;
dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer); dest_texture = dst_subres.surface->get_surface(rsx::surface_access::transfer_write);
typeless_info.dst_context = texture_upload_context::framebuffer_storage; typeless_info.dst_context = texture_upload_context::framebuffer_storage;
dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth; dst_is_depth_surface = typeless_info.dst_is_typeless ? false : dst_subres.is_depth;
@ -2692,7 +2695,7 @@ namespace rsx
else else
{ {
src_area = src_subres.src_area; src_area = src_subres.src_area;
vram_texture = src_subres.surface->get_surface(rsx::surface_access::read); vram_texture = src_subres.surface->get_surface(rsx::surface_access::transfer_read);
typeless_info.src_context = texture_upload_context::framebuffer_storage; typeless_info.src_context = texture_upload_context::framebuffer_storage;
} }
@ -2879,7 +2882,7 @@ namespace rsx
std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale<true>(src_area.x2, src_area.y2, surface_width, surface_height); std::tie(src_area.x2, src_area.y2) = rsx::apply_resolution_scale<true>(src_area.x2, src_area.y2, surface_width, surface_height);
// The resource is of surface type; possibly disabled AA emulation // The resource is of surface type; possibly disabled AA emulation
src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, src_area); src_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_read, src_area);
} }
if (dst_is_render_target) if (dst_is_render_target)
@ -2890,7 +2893,7 @@ namespace rsx
std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale<true>(dst_area.x2, dst_area.y2, surface_width, surface_height); std::tie(dst_area.x2, dst_area.y2) = rsx::apply_resolution_scale<true>(dst_area.x2, dst_area.y2, surface_width, surface_height);
// The resource is of surface type; possibly disabled AA emulation // The resource is of surface type; possibly disabled AA emulation
dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer, dst_area); dst_subres.surface->transform_blit_coordinates(rsx::surface_access::transfer_write, dst_area);
} }
if (helpers::is_gcm_depth_format(typeless_info.src_gcm_format) != if (helpers::is_gcm_depth_format(typeless_info.src_gcm_format) !=

View file

@ -317,7 +317,7 @@ namespace rsx
out.push_back out.push_back
({ ({
section.surface->get_surface(rsx::surface_access::read), section.surface->get_surface(rsx::surface_access::shader_read),
surface_transform::identity, surface_transform::identity,
0, 0,
static_cast<u16>(src_x), static_cast<u16>(src_x),
@ -558,7 +558,7 @@ namespace rsx
const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class(); const auto format_class = (force_convert) ? classify_format(attr2.gcm_format) : texptr->format_class();
const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static;
return { texptr->get_surface(rsx::surface_access::read), command, attr2, {}, return { texptr->get_surface(rsx::surface_access::shader_read), command, attr2, {},
texture_upload_context::framebuffer_storage, format_class, scale, texture_upload_context::framebuffer_storage, format_class, scale,
extended_dimension, decoded_remap }; extended_dimension, decoded_remap };
} }
@ -569,7 +569,7 @@ namespace rsx
if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d) if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_3d)
{ {
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap, return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::_3d_unwrap,
attr2, {}, attr2, {},
texture_upload_context::framebuffer_storage, texptr->format_class(), scale, texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap };
@ -577,7 +577,7 @@ namespace rsx
ensure(extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap); ensure(extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap);
return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap, return{ texptr->get_surface(rsx::surface_access::shader_read), deferred_request_command::cubemap_unwrap,
attr2, {}, attr2, {},
texture_upload_context::framebuffer_storage, texptr->format_class(), scale, texture_upload_context::framebuffer_storage, texptr->format_class(), scale,
rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap };

View file

@ -12,7 +12,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
gl::command_context cmd = { gl_state }; gl::command_context cmd = { gl_state };
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
if (!overlap_info.empty()) if (!overlap_info.empty())
{ {
@ -46,7 +46,7 @@ gl::texture* GLGSRender::get_present_source(gl::present_surface_info* info, cons
if (viable) if (viable)
{ {
surface->read_barrier(cmd); surface->read_barrier(cmd);
image = section.surface->get_surface(rsx::surface_access::read); image = section.surface->get_surface(rsx::surface_access::shader_read);
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>( std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
std::min(surface_width, static_cast<u16>(info->width)), std::min(surface_width, static_cast<u16>(info->width)),

View file

@ -456,7 +456,21 @@ void gl::render_target::initialize_memory(gl::command_context& cmd, bool /*read_
void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access) void gl::render_target::memory_barrier(gl::command_context& cmd, rsx::surface_access access)
{ {
const bool read_access = (access != rsx::surface_access::write); const bool read_access = access.is_read();
const bool is_depth = is_depth_surface();
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
if (should_read_buffers)
{
// TODO: Decide what to do when memory loads are disabled but the underlying has memory changed
// NOTE: Assume test() is expensive when in a pinch
if (last_use_tag && state_flags == rsx::surface_state_flags::ready && !test())
{
// TODO: Figure out why merely returning and failing the test does not work when reading (TLoU)
// The result should have been the same either way
state_flags |= rsx::surface_state_flags::erase_bkgnd;
}
}
if (old_contents.empty()) if (old_contents.empty())
{ {

View file

@ -111,8 +111,8 @@ namespace gl
} }
void memory_barrier(gl::command_context& cmd, rsx::surface_access access); void memory_barrier(gl::command_context& cmd, rsx::surface_access access);
void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::read); } void read_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::write); } void write_barrier(gl::command_context& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
}; };
struct framebuffer_holder : public gl::fbo, public rsx::ref_counted struct framebuffer_holder : public gl::fbo, public rsx::ref_counted

View file

@ -281,7 +281,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
// Check the surface store first // Check the surface store first
const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format); const auto format_bpp = rsx::get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read); info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::shader_read);
if (!overlap_info.empty()) if (!overlap_info.empty())
{ {
@ -315,7 +315,7 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
if (viable) if (viable)
{ {
surface->read_barrier(*m_current_command_buffer); surface->read_barrier(*m_current_command_buffer);
image_to_flip = section.surface->get_surface(rsx::surface_access::read); image_to_flip = section.surface->get_surface(rsx::surface_access::shader_read);
std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>( std::tie(info->width, info->height) = rsx::apply_resolution_scale<true>(
std::min(surface_width, static_cast<u16>(info->width)), std::min(surface_width, static_cast<u16>(info->width)),

View file

@ -304,7 +304,7 @@ namespace vk
vk::viewable_image* render_target::get_surface(rsx::surface_access access_type) vk::viewable_image* render_target::get_surface(rsx::surface_access access_type)
{ {
if (samples() == 1 || access_type == rsx::surface_access::write) if (samples() == 1 || access_type == rsx::surface_access::shader_write)
{ {
return this; return this;
} }
@ -369,7 +369,7 @@ namespace vk
void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access) void render_target::memory_barrier(vk::command_buffer& cmd, rsx::surface_access access)
{ {
const bool read_access = (access != rsx::surface_access::write); const bool read_access = access.is_read();
const bool is_depth = is_depth_surface(); const bool is_depth = is_depth_surface();
const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers; const bool should_read_buffers = is_depth ? !!g_cfg.video.read_depth_buffer : !!g_cfg.video.read_color_buffers;
@ -533,8 +533,8 @@ namespace vk
hw_blitter.scale_image( hw_blitter.scale_image(
cmd, cmd,
src_texture->get_surface(rsx::surface_access::read), src_texture->get_surface(rsx::surface_access::transfer_read),
this->get_surface(rsx::surface_access::transfer), this->get_surface(rsx::surface_access::transfer_write),
src_area, src_area,
dst_area, dst_area,
/*linear?*/false, typeless_info); /*linear?*/false, typeless_info);

View file

@ -53,8 +53,8 @@ namespace vk
// Synchronization // Synchronization
void texture_barrier(vk::command_buffer& cmd); void texture_barrier(vk::command_buffer& cmd);
void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access); void memory_barrier(vk::command_buffer& cmd, rsx::surface_access access);
void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::read); } void read_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_read); }
void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::write); } void write_barrier(vk::command_buffer& cmd) { memory_barrier(cmd, rsx::surface_access::shader_write); }
}; };
static inline vk::render_target* as_rtt(vk::image* t) static inline vk::render_target* as_rtt(vk::image* t)

View file

@ -197,7 +197,7 @@ namespace vk
{ {
auto surface = vk::as_rtt(vram_texture); auto surface = vk::as_rtt(vram_texture);
surface->read_barrier(cmd); surface->read_barrier(cmd);
locked_resource = surface->get_surface(rsx::surface_access::read); locked_resource = surface->get_surface(rsx::surface_access::shader_read);
transfer_width *= surface->samples_x; transfer_width *= surface->samples_x;
transfer_height *= surface->samples_y; transfer_height *= surface->samples_y;
} }