rsx: Set up for multi-section inheritance

This commit is contained in:
kd-11 2019-07-13 17:52:55 +03:00 committed by kd-11
parent 397e5dd1ea
commit 009e01a347
8 changed files with 199 additions and 198 deletions

View file

@ -188,7 +188,7 @@ namespace rsx
template <bool is_depth_surface> template <bool is_depth_surface>
void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface, surface_type prev_surface) void intersect_surface_region(command_list_type cmd, u32 address, surface_type new_surface, surface_type prev_surface)
{ {
auto scan_list = [&new_surface, address](const rsx::address_range& mem_range, u64 timestamp_check, auto scan_list = [&new_surface, address](const rsx::address_range& mem_range,
std::unordered_map<u32, surface_storage_type>& data) -> std::vector<std::pair<u32, surface_type>> std::unordered_map<u32, surface_storage_type>& data) -> std::vector<std::pair<u32, surface_type>>
{ {
std::vector<std::pair<u32, surface_type>> result; std::vector<std::pair<u32, surface_type>> result;
@ -196,7 +196,7 @@ namespace rsx
{ {
auto surface = Traits::get(e.second); auto surface = Traits::get(e.second);
if (e.second->last_use_tag <= timestamp_check || if (new_surface->last_use_tag > surface->last_use_tag ||
new_surface == surface || new_surface == surface ||
address == e.first || address == e.first ||
e.second->dirty()) e.second->dirty())
@ -235,10 +235,8 @@ namespace rsx
}; };
const rsx::address_range mem_range = new_surface->get_memory_range(); const rsx::address_range mem_range = new_surface->get_memory_range();
const u64 timestamp_check = prev_surface ? prev_surface->last_use_tag : new_surface->last_use_tag; auto list1 = scan_list(mem_range, m_render_targets_storage);
auto list2 = scan_list(mem_range, m_depth_stencil_storage);
auto list1 = scan_list(mem_range, timestamp_check, m_render_targets_storage);
auto list2 = scan_list(mem_range, timestamp_check, m_depth_stencil_storage);
if (prev_surface) if (prev_surface)
{ {
@ -279,10 +277,10 @@ namespace rsx
if (UNLIKELY(surface_info.size() > 1)) if (UNLIKELY(surface_info.size() > 1))
{ {
// Sort with newest first for early exit // Sort with oldest first for early exit
std::sort(surface_info.begin(), surface_info.end(), [](const auto& a, const auto& b) std::sort(surface_info.begin(), surface_info.end(), [](const auto& a, const auto& b)
{ {
return (a.second->last_use_tag > b.second->last_use_tag); return (a.second->last_use_tag < b.second->last_use_tag);
}); });
} }
@ -313,6 +311,12 @@ namespace rsx
continue; continue;
} }
if (child_w == size.width && child_h == size.height && surface_info.size() > 1)
{
// If the write covers the whole area, discard anything older
new_surface->clear_rw_barrier();
}
// TODO: Eventually need to stack all the overlapping regions, but for now just do the latest rect in the space // TODO: Eventually need to stack all the overlapping regions, but for now just do the latest rect in the space
deferred_clipped_region<surface_type> region; deferred_clipped_region<surface_type> region;
region.src_x = src_offset.x; region.src_x = src_offset.x;
@ -325,7 +329,6 @@ namespace rsx
region.target = new_surface; region.target = new_surface;
new_surface->set_old_contents_region(region, true); new_surface->set_old_contents_region(region, true);
break;
} }
} }
@ -367,12 +370,6 @@ namespace rsx
surface_storage_type &surface = It->second; surface_storage_type &surface = It->second;
const bool pitch_compatible = Traits::surface_is_pitch_compatible(surface, pitch); const bool pitch_compatible = Traits::surface_is_pitch_compatible(surface, pitch);
if (pitch_compatible)
{
// Preserve memory outside the area to be inherited if needed
split_surface_region<depth>(command_list, address, Traits::get(surface), (u16)width, (u16)height, bpp, antialias);
}
if (Traits::surface_matches_properties(surface, format, width, height, antialias)) if (Traits::surface_matches_properties(surface, format, width, height, antialias))
{ {
if (pitch_compatible) if (pitch_compatible)
@ -386,6 +383,12 @@ namespace rsx
} }
else else
{ {
if (pitch_compatible)
{
// Preserve memory outside the area to be inherited if needed
split_surface_region<depth>(command_list, address, Traits::get(surface), (u16)width, (u16)height, bpp, antialias);
}
old_surface = Traits::get(surface); old_surface = Traits::get(surface);
old_surface_storage = std::move(surface); old_surface_storage = std::move(surface);
primary_storage->erase(It); primary_storage->erase(It);
@ -457,12 +460,22 @@ namespace rsx
} }
} }
// Check if old_surface is 'new' and avoid intersection bool do_intersection_test = true;
// Check if old_surface is 'new' and hopefully avoid intersection
if (old_surface && old_surface->last_use_tag >= write_tag) if (old_surface && old_surface->last_use_tag >= write_tag)
{ {
const auto new_area = new_surface->get_normalized_memory_area();
const auto old_area = old_surface->get_normalized_memory_area();
if (new_area.x2 <= old_area.x2 && new_area.y2 <= old_area.y2)
{
do_intersection_test = false;
new_surface->set_old_contents(old_surface); new_surface->set_old_contents(old_surface);
} }
else }
if (do_intersection_test)
{ {
intersect_surface_region<depth>(command_list, address, new_surface, old_surface); intersect_surface_region<depth>(command_list, address, new_surface, old_surface);
} }

View file

@ -51,6 +51,9 @@ namespace rsx
template <typename surface_type> template <typename surface_type>
struct deferred_clipped_region struct deferred_clipped_region
{ {
// Chain
deferred_clipped_region<surface_type>* next_ptr = nullptr;
u16 src_x, src_y, dst_x, dst_y, width, height; u16 src_x, src_y, dst_x, dst_y, width, height;
f32 transfer_scale_x, transfer_scale_y; f32 transfer_scale_x, transfer_scale_y;
surface_type target; surface_type target;
@ -130,8 +133,7 @@ namespace rsx
u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to u64 last_use_tag = 0; // tag indicating when this block was last confirmed to have been written to
std::array<std::pair<u32, u64>, 5> memory_tag_samples; std::array<std::pair<u32, u64>, 5> memory_tag_samples;
// Obsolete, requires updating std::vector<deferred_clipped_region<image_storage_type>> old_contents;
deferred_clipped_region<image_storage_type> old_contents{};
// Surface properties // Surface properties
u16 rsx_pitch = 0; u16 rsx_pitch = 0;
@ -161,7 +163,7 @@ namespace rsx
virtual ~render_target_descriptor() virtual ~render_target_descriptor()
{ {
if (old_contents) if (!old_contents.empty())
{ {
// Cascade resource derefs // Cascade resource derefs
LOG_ERROR(RSX, "Resource was destroyed whilst holding a resource reference!"); LOG_ERROR(RSX, "Resource was destroyed whilst holding a resource reference!");
@ -284,7 +286,7 @@ namespace rsx
bool dirty() const bool dirty() const
{ {
return (state_flags != rsx::surface_state_flags::ready) || old_contents; return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty();
} }
bool test() const bool test() const
@ -311,45 +313,37 @@ namespace rsx
void clear_rw_barrier() void clear_rw_barrier()
{ {
release_ref(old_contents.source); for (auto &e : old_contents)
old_contents = {}; {
release_ref(e.source);
}
old_contents.clear();
} }
template<typename T> template<typename T>
void set_old_contents(T* other) void set_old_contents(T* other)
{ {
verify(HERE), !old_contents; verify(HERE), old_contents.empty();
if (!other || other->get_rsx_pitch() != this->get_rsx_pitch()) if (!other || other->get_rsx_pitch() != this->get_rsx_pitch())
{ {
old_contents = {};
return; return;
} }
old_contents = {}; old_contents.emplace_back();
old_contents.source = other; old_contents.back().source = other;
other->add_ref(); other->add_ref();
} }
template<typename T> template<typename T>
void set_old_contents_region(const T& region, bool normalized) void set_old_contents_region(const T& region, bool normalized)
{ {
if (old_contents)
{
// This can happen when doing memory splits
auto old_surface = static_cast<decltype(region.source)>(old_contents.source);
if (old_surface->last_use_tag > region.source->last_use_tag)
{
return;
}
clear_rw_barrier();
}
// NOTE: This method will not perform pitch verification! // NOTE: This method will not perform pitch verification!
verify(HERE), !old_contents, region.source, region.source != this; verify(HERE), region.source, region.source != this;
old_contents = region.template cast<image_storage_type>(); old_contents.push_back(region.template cast<image_storage_type>());
auto &slice = old_contents.back();
region.source->add_ref(); region.source->add_ref();
// Reverse normalization process if needed // Reverse normalization process if needed
@ -357,39 +351,39 @@ namespace rsx
{ {
const u16 bytes_to_texels_x = region.source->get_bpp() * region.source->samples_x; const u16 bytes_to_texels_x = region.source->get_bpp() * region.source->samples_x;
const u16 rows_to_texels_y = region.source->samples_y; const u16 rows_to_texels_y = region.source->samples_y;
old_contents.src_x /= bytes_to_texels_x; slice.src_x /= bytes_to_texels_x;
old_contents.src_y /= rows_to_texels_y; slice.src_y /= rows_to_texels_y;
old_contents.width /= bytes_to_texels_x; slice.width /= bytes_to_texels_x;
old_contents.height /= rows_to_texels_y; slice.height /= rows_to_texels_y;
const u16 bytes_to_texels_x2 = (get_bpp() * samples_x); const u16 bytes_to_texels_x2 = (get_bpp() * samples_x);
const u16 rows_to_texels_y2 = samples_y; const u16 rows_to_texels_y2 = samples_y;
old_contents.dst_x /= bytes_to_texels_x2; slice.dst_x /= bytes_to_texels_x2;
old_contents.dst_y /= rows_to_texels_y2; slice.dst_y /= rows_to_texels_y2;
old_contents.transfer_scale_x = f32(bytes_to_texels_x) / bytes_to_texels_x2; slice.transfer_scale_x = f32(bytes_to_texels_x) / bytes_to_texels_x2;
old_contents.transfer_scale_y = f32(rows_to_texels_y) / rows_to_texels_y2; slice.transfer_scale_y = f32(rows_to_texels_y) / rows_to_texels_y2;
} }
// Apply resolution scale if needed // Apply resolution scale if needed
if (g_cfg.video.resolution_scale_percent != 100) if (g_cfg.video.resolution_scale_percent != 100)
{ {
auto src_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.source->width()); auto src_width = rsx::apply_resolution_scale(slice.width, true, slice.source->width());
auto src_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.source->height()); auto src_height = rsx::apply_resolution_scale(slice.height, true, slice.source->height());
auto dst_width = rsx::apply_resolution_scale(old_contents.width, true, old_contents.target->width()); auto dst_width = rsx::apply_resolution_scale(slice.width, true, slice.target->width());
auto dst_height = rsx::apply_resolution_scale(old_contents.height, true, old_contents.target->height()); auto dst_height = rsx::apply_resolution_scale(slice.height, true, slice.target->height());
old_contents.transfer_scale_x *= f32(dst_width) / src_width; slice.transfer_scale_x *= f32(dst_width) / src_width;
old_contents.transfer_scale_y *= f32(dst_height) / src_height; slice.transfer_scale_y *= f32(dst_height) / src_height;
old_contents.width = src_width; slice.width = src_width;
old_contents.height = src_height; slice.height = src_height;
old_contents.src_x = rsx::apply_resolution_scale(old_contents.src_x, false, old_contents.source->width()); slice.src_x = rsx::apply_resolution_scale(slice.src_x, false, slice.source->width());
old_contents.src_y = rsx::apply_resolution_scale(old_contents.src_y, false, old_contents.source->height()); slice.src_y = rsx::apply_resolution_scale(slice.src_y, false, slice.source->height());
old_contents.dst_x = rsx::apply_resolution_scale(old_contents.dst_x, false, old_contents.target->width()); slice.dst_x = rsx::apply_resolution_scale(slice.dst_x, false, slice.target->width());
old_contents.dst_y = rsx::apply_resolution_scale(old_contents.dst_y, false, old_contents.target->height()); slice.dst_y = rsx::apply_resolution_scale(slice.dst_y, false, slice.target->height());
} }
} }
@ -457,7 +451,7 @@ namespace rsx
msaa_flags = resolve_flags; msaa_flags = resolve_flags;
} }
if (old_contents.source) if (!old_contents.empty())
{ {
clear_rw_barrier(); clear_rw_barrier();
} }

View file

@ -213,19 +213,18 @@ void GLGSRender::end()
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil); gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
// Handle special memory barrier for ARGB8->D24S8 in an active DSV // Handle special memory barrier for ARGB8->D24S8 in an active DSV
if (ds && ds->old_contents && if (ds && ds->old_contents.size() == 1 &&
ds->old_contents.source->get_internal_format() == gl::texture::internal_format::rgba8 && ds->old_contents[0].source->get_internal_format() == gl::texture::internal_format::rgba8)
rsx::pitch_compatible(ds, gl::as_rtt(ds->old_contents.source)))
{ {
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST); gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// TODO: Stencil transfer // TODO: Stencil transfer
gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF); gl::g_hw_blitter->fast_clear_image(cmd, ds, 1.f, 0xFF);
ds->old_contents.init_transfer(ds); ds->old_contents[0].init_transfer(ds);
m_depth_converter.run(ds->old_contents.src_rect(), m_depth_converter.run(ds->old_contents[0].src_rect(),
ds->old_contents.dst_rect(), ds->old_contents[0].dst_rect(),
ds->old_contents.source, ds); ds->old_contents[0].source, ds);
ds->on_write(); ds->on_write();
} }

View file

@ -1,4 +1,4 @@
#include "stdafx.h" #include "stdafx.h"
#include "GLGSRender.h" #include "GLGSRender.h"
#include "Emu/System.h" #include "Emu/System.h"
@ -609,7 +609,7 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
state_flags &= ~rsx::surface_state_flags::erase_bkgnd; state_flags &= ~rsx::surface_state_flags::erase_bkgnd;
}; };
if (!old_contents) if (old_contents.empty())
{ {
// No memory to inherit // No memory to inherit
if (dirty() && (force_init || state_flags & rsx::surface_state_flags::erase_bkgnd)) if (dirty() && (force_init || state_flags & rsx::surface_state_flags::erase_bkgnd))
@ -623,15 +623,9 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
return; return;
} }
auto src_texture = gl::as_rtt(old_contents.source); for (auto &section : old_contents)
if (!rsx::pitch_compatible(this, src_texture))
{ {
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory"); auto src_texture = gl::as_rtt(section.source);
clear_rw_barrier();
return;
}
const auto src_bpp = src_texture->get_bpp(); const auto src_bpp = src_texture->get_bpp();
const auto dst_bpp = get_bpp(); const auto dst_bpp = get_bpp();
rsx::typeless_xfer typeless_info{}; rsx::typeless_xfer typeless_info{};
@ -656,11 +650,11 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
} }
const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth); const bool dst_is_depth = !!(aspect() & gl::image_aspect::depth);
old_contents.init_transfer(this); section.init_transfer(this);
if (state_flags & rsx::surface_state_flags::erase_bkgnd) if (state_flags & rsx::surface_state_flags::erase_bkgnd)
{ {
const auto area = old_contents.dst_rect(); const auto area = section.dst_rect();
if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height()) if (area.x1 > 0 || area.y1 > 0 || unsigned(area.x2) < width() || unsigned(area.y2) < height())
{ {
clear_surface_impl(); clear_surface_impl();
@ -671,10 +665,11 @@ void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init
} }
} }
gl::g_hw_blitter->scale_image(cmd, old_contents.source, this, gl::g_hw_blitter->scale_image(cmd, section.source, this,
old_contents.src_rect(), section.src_rect(),
old_contents.dst_rect(), section.dst_rect(),
!dst_is_depth, dst_is_depth, typeless_info); !dst_is_depth, dst_is_depth, typeless_info);
}
// Memory has been transferred, discard old contents and update memory flags // Memory has been transferred, discard old contents and update memory flags
// TODO: Preserve memory outside surface clip region // TODO: Preserve memory outside surface clip region

View file

@ -269,7 +269,7 @@ struct gl_render_target_traits
static static
void notify_surface_invalidated(const std::unique_ptr<gl::render_target>& surface) void notify_surface_invalidated(const std::unique_ptr<gl::render_target>& surface)
{ {
if (surface->old_contents) if (!surface->old_contents.empty())
{ {
// TODO: Retire the deferred writes // TODO: Retire the deferred writes
surface->clear_rw_barrier(); surface->clear_rw_barrier();

View file

@ -1212,8 +1212,8 @@ void VKGSRender::end()
// Check for data casts // Check for data casts
// NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
if (ds && ds->old_contents && if (ds && ds->old_contents.size() == 1 &&
ds->old_contents.source->info.format == VK_FORMAT_B8G8R8A8_UNORM) ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM)
{ {
auto key = vk::get_renderpass_key(ds->info.format); auto key = vk::get_renderpass_key(ds->info.format);
auto render_pass = vk::get_renderpass(*m_device, key); auto render_pass = vk::get_renderpass(*m_device, key);
@ -1223,7 +1223,7 @@ void VKGSRender::end()
VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 }; VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
// Initialize source // Initialize source
auto src = vk::as_rtt(ds->old_contents.source); auto src = vk::as_rtt(ds->old_contents[0].source);
src->read_barrier(*m_current_command_buffer); src->read_barrier(*m_current_command_buffer);
switch (src->current_layout) switch (src->current_layout)
@ -1244,10 +1244,10 @@ void VKGSRender::end()
if (!preinitialized) ds->pop_layout(*m_current_command_buffer); if (!preinitialized) ds->pop_layout(*m_current_command_buffer);
// TODO: Stencil transfer // TODO: Stencil transfer
ds->old_contents.init_transfer(ds); ds->old_contents[0].init_transfer(ds);
m_depth_converter->run(*m_current_command_buffer, m_depth_converter->run(*m_current_command_buffer,
ds->old_contents.src_rect(), ds->old_contents[0].src_rect(),
ds->old_contents.dst_rect(), ds->old_contents[0].dst_rect(),
src->get_view(0xAAE4, rsx::default_remap_vector), src->get_view(0xAAE4, rsx::default_remap_vector),
ds, render_pass); ds, render_pass);

View file

@ -281,13 +281,7 @@ namespace vk
get_resolve_target(); get_resolve_target();
} }
if (old_contents && !rsx::pitch_compatible(this, static_cast<vk::render_target*>(old_contents.source))) if (LIKELY(old_contents.empty()))
{
LOG_TRACE(RSX, "Pitch mismatch, could not transfer inherited memory");
clear_rw_barrier();
}
if (LIKELY(!old_contents))
{ {
if (state_flags & rsx::surface_state_flags::erase_bkgnd) if (state_flags & rsx::surface_state_flags::erase_bkgnd)
{ {
@ -321,7 +315,10 @@ namespace vk
return; return;
} }
auto src_texture = static_cast<vk::render_target*>(old_contents.source); bool optimize_copy = true;
for (auto &section : old_contents)
{
auto src_texture = static_cast<vk::render_target*>(section.source);
src_texture->read_barrier(cmd); src_texture->read_barrier(cmd);
const auto src_bpp = src_texture->get_bpp(); const auto src_bpp = src_texture->get_bpp();
@ -346,10 +343,10 @@ namespace vk
} }
vk::blitter hw_blitter; vk::blitter hw_blitter;
old_contents.init_transfer(this); section.init_transfer(this);
auto src_area = old_contents.src_rect(); auto src_area = section.src_rect();
auto dst_area = old_contents.dst_rect(); auto dst_area = section.dst_rect();
if (g_cfg.video.antialiasing_level != msaa_level::none) if (g_cfg.video.antialiasing_level != msaa_level::none)
{ {
@ -390,7 +387,10 @@ namespace vk
dst_area, dst_area,
/*linear?*/false, /*depth?(unused)*/false, typeless_info); /*linear?*/false, /*depth?(unused)*/false, typeless_info);
on_write_copy(0, !memory_load); optimize_copy = optimize_copy && !memory_load;
}
on_write_copy(0, optimize_copy);
if (!read_access && samples() > 1) if (!read_access && samples() > 1)
{ {
@ -639,7 +639,7 @@ namespace rsx
surface->frame_tag = vk::get_current_frame_id(); surface->frame_tag = vk::get_current_frame_id();
if (!surface->frame_tag) surface->frame_tag = 1; if (!surface->frame_tag) surface->frame_tag = 1;
if (surface->old_contents) if (!surface->old_contents.empty())
{ {
// TODO: Retire the deferred writes // TODO: Retire the deferred writes
surface->clear_rw_barrier(); surface->clear_rw_barrier();

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "../System.h" #include "../System.h"
#include "Utilities/address_range.h" #include "Utilities/address_range.h"
@ -606,7 +606,7 @@ namespace rsx
template <typename SurfaceType> template <typename SurfaceType>
std::tuple<u16, u16, u16, u16> get_transferable_region(const SurfaceType* surface) std::tuple<u16, u16, u16, u16> get_transferable_region(const SurfaceType* surface)
{ {
auto src = static_cast<const SurfaceType*>(surface->old_contents.source); auto src = static_cast<const SurfaceType*>(surface->old_contents[0].source);
auto area1 = src->get_normalized_memory_area(); auto area1 = src->get_normalized_memory_area();
auto area2 = surface->get_normalized_memory_area(); auto area2 = surface->get_normalized_memory_area();