mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 01:38:37 +12:00
rsx: Attempt to reduce stencil load overhead for nvidia cards
This commit is contained in:
parent
ca82dd7200
commit
8a1cf2c913
5 changed files with 75 additions and 9 deletions
|
@ -148,6 +148,7 @@ namespace rsx
|
|||
flags32_t memory_usage_flags = surface_usage_flags::unknown;
|
||||
flags32_t state_flags = surface_state_flags::ready;
|
||||
flags32_t msaa_flags = surface_state_flags::ready;
|
||||
flags32_t stencil_init_flags = 0;
|
||||
|
||||
union
|
||||
{
|
||||
|
@ -462,9 +463,15 @@ namespace rsx
|
|||
}
|
||||
}
|
||||
|
||||
void on_write_copy(u64 write_tag = 0)
|
||||
void on_write_copy(u64 write_tag = 0, bool keep_optimizations = false)
|
||||
{
|
||||
on_write(write_tag, rsx::surface_state_flags::require_unresolve);
|
||||
|
||||
if (!keep_optimizations && is_depth_surface())
|
||||
{
|
||||
// A successful write-copy occured, cannot guarantee flat contents in stencil area
|
||||
stencil_init_flags |= (1 << 9);
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the rect area occupied by this surface expressed as an 8bpp image with no AA
|
||||
|
|
|
@ -261,6 +261,7 @@ struct gl_render_target_traits
|
|||
surface->set_rsx_pitch((u16)pitch);
|
||||
surface->queue_tag(address);
|
||||
surface->last_use_tag = 0;
|
||||
surface->stencil_init_flags = 0;
|
||||
surface->memory_usage_flags = rsx::surface_usage_flags::unknown;
|
||||
}
|
||||
|
||||
|
|
|
@ -1882,7 +1882,7 @@ void VKGSRender::clear_surface(u32 mask)
|
|||
|
||||
//clip region
|
||||
std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true);
|
||||
VkClearRect region = { { { scissor_x, scissor_y },{ scissor_w, scissor_h } }, 0, 1 };
|
||||
VkClearRect region = { { { scissor_x, scissor_y }, { scissor_w, scissor_h } }, 0, 1 };
|
||||
|
||||
const bool require_mem_load = (scissor_w * scissor_h) < (fb_width * fb_height);
|
||||
auto surface_depth_format = rsx::method_registers.surface_depth_fmt();
|
||||
|
@ -1910,6 +1910,12 @@ void VKGSRender::clear_surface(u32 mask)
|
|||
depth_stencil_clear_values.depthStencil.stencil = clear_stencil;
|
||||
|
||||
depth_stencil_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
if (ds->samples() > 1)
|
||||
{
|
||||
if (!require_mem_load) ds->stencil_init_flags &= 0xFF;
|
||||
ds->stencil_init_flags |= clear_stencil;
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask & 0x3) != 0x3 && !require_mem_load && ds->state_flags & rsx::surface_state_flags::erase_bkgnd)
|
||||
|
@ -2470,6 +2476,21 @@ bool VKGSRender::load_program()
|
|||
vk::get_compare_func(rsx::method_registers.back_stencil_func()),
|
||||
0xFF, 0xFF); //write mask, func_mask, ref are dynamic
|
||||
}
|
||||
|
||||
if (auto ds = m_rtts.m_bound_depth_stencil.second;
|
||||
ds && ds->samples() > 1 && !(ds->stencil_init_flags & 0xFF00))
|
||||
{
|
||||
if (properties.state.ds.front.failOp != VK_STENCIL_OP_KEEP ||
|
||||
properties.state.ds.front.depthFailOp != VK_STENCIL_OP_KEEP ||
|
||||
properties.state.ds.front.passOp != VK_STENCIL_OP_KEEP ||
|
||||
properties.state.ds.front.failOp != VK_STENCIL_OP_KEEP ||
|
||||
properties.state.ds.front.depthFailOp != VK_STENCIL_OP_KEEP ||
|
||||
properties.state.ds.front.passOp != VK_STENCIL_OP_KEEP)
|
||||
{
|
||||
// Toggle bit 9 to signal require full bit-wise transfer
|
||||
ds->stencil_init_flags |= (1 << 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto rasterization_samples = u8((m_current_renderpass_key >> 16) & 0xF);
|
||||
|
|
|
@ -359,12 +359,16 @@ namespace vk
|
|||
}
|
||||
|
||||
vk::image *target_image = (samples() > 1) ? get_resolve_target() : this;
|
||||
bool memory_load = true;
|
||||
if (dst_area.x1 == 0 && dst_area.y1 == 0 &&
|
||||
unsigned(dst_area.x2) == target_image->width() && unsigned(dst_area.y2) == target_image->height())
|
||||
{
|
||||
// Skip a bunch of useless work
|
||||
state_flags &= ~(rsx::surface_state_flags::erase_bkgnd);
|
||||
msaa_flags = rsx::surface_state_flags::ready;
|
||||
|
||||
memory_load = false;
|
||||
stencil_init_flags = src_texture->stencil_init_flags;
|
||||
}
|
||||
else if (state_flags & rsx::surface_state_flags::erase_bkgnd)
|
||||
{
|
||||
|
@ -387,7 +391,7 @@ namespace vk
|
|||
dst_area,
|
||||
/*linear?*/false, /*depth?(unused)*/false, typeless_info);
|
||||
|
||||
on_write_copy();
|
||||
on_write_copy(0, !memory_load);
|
||||
|
||||
if (!read_access && samples() > 1)
|
||||
{
|
||||
|
@ -570,6 +574,7 @@ namespace rsx
|
|||
sink->state_flags = rsx::surface_state_flags::erase_bkgnd;
|
||||
sink->native_component_map = ref->native_component_map;
|
||||
sink->sample_layout = ref->sample_layout;
|
||||
sink->stencil_init_flags = ref->stencil_init_flags;
|
||||
sink->native_pitch = u16(prev.width * ref->get_bpp() * ref->samples_x);
|
||||
sink->surface_width = prev.width;
|
||||
sink->surface_height = prev.height;
|
||||
|
@ -631,6 +636,7 @@ namespace rsx
|
|||
surface->rsx_pitch = (u16)pitch;
|
||||
surface->queue_tag(address);
|
||||
surface->last_use_tag = 0;
|
||||
surface->stencil_init_flags = 0;
|
||||
surface->memory_usage_flags = rsx::surface_usage_flags::unknown;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
#include "VKResolveHelper.h"
|
||||
#include "VKRenderPass.h"
|
||||
#include "VKRenderTargets.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -83,10 +84,26 @@ namespace vk
|
|||
else
|
||||
{
|
||||
initialize_pass(g_depth_resolver, dev);
|
||||
initialize_pass(g_stencil_resolver, dev);
|
||||
|
||||
g_depth_resolver->run(cmd, src, dst, renderpass);
|
||||
g_stencil_resolver->run(cmd, src, dst, renderpass);
|
||||
|
||||
// Chance for optimization here: If the stencil buffer was not used, simply perform a clear operation
|
||||
const auto stencil_init_flags = vk::as_rtt(src)->stencil_init_flags;
|
||||
if (stencil_init_flags & 0xFF00)
|
||||
{
|
||||
initialize_pass(g_stencil_resolver, dev);
|
||||
g_stencil_resolver->run(cmd, src, dst, renderpass);
|
||||
}
|
||||
else
|
||||
{
|
||||
VkClearDepthStencilValue clear{ 1.f, stencil_init_flags & 0xFF };
|
||||
VkImageSubresourceRange range{ VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
|
||||
|
||||
dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
vkCmdClearDepthStencilImage(cmd, dst->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range);
|
||||
dst->pop_layout(cmd);
|
||||
}
|
||||
|
||||
vk::as_rtt(dst)->stencil_init_flags = stencil_init_flags;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -128,10 +145,24 @@ namespace vk
|
|||
else
|
||||
{
|
||||
initialize_pass(g_depth_unresolver, dev);
|
||||
initialize_pass(g_stencil_unresolver, dev);
|
||||
|
||||
g_depth_unresolver->run(cmd, dst, src, renderpass);
|
||||
g_stencil_unresolver->run(cmd, dst, src, renderpass);
|
||||
|
||||
// Chance for optimization here: If the stencil buffer was not used, simply perform a clear operation
|
||||
const auto stencil_init_flags = vk::as_rtt(dst)->stencil_init_flags;
|
||||
if (stencil_init_flags & 0xFF00)
|
||||
{
|
||||
initialize_pass(g_stencil_unresolver, dev);
|
||||
g_stencil_unresolver->run(cmd, dst, src, renderpass);
|
||||
}
|
||||
else
|
||||
{
|
||||
VkClearDepthStencilValue clear{ 1.f, stencil_init_flags & 0xFF };
|
||||
VkImageSubresourceRange range{ VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 };
|
||||
|
||||
dst->push_layout(cmd, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
vkCmdClearDepthStencilImage(cmd, dst->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range);
|
||||
dst->pop_layout(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue