mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 06:21:26 +12:00
Strict Rendering Mode (SRM) fix. Move old surface copy before texture upload.
Fixes the following issues on Tales of Vesperia which requires SRM. - Blacked out scene after the sleeping dog now renders correctly - Ghosting effect. The ghosting was most noticeable as a delay between the character rendering and the cell shading around the character. This appears to be gone with this change.
This commit is contained in:
parent
70cc2e3665
commit
a17025c465
2 changed files with 183 additions and 181 deletions
|
@ -205,6 +205,113 @@ void GLGSRender::end()
|
||||||
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
||||||
auto upload_info = set_vertex_buffer();
|
auto upload_info = set_vertex_buffer();
|
||||||
|
|
||||||
|
//Check if depth buffer is bound and valid
|
||||||
|
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
|
||||||
|
auto copy_rtt_contents = [](gl::render_target *surface)
|
||||||
|
{
|
||||||
|
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
|
||||||
|
{
|
||||||
|
//Copy data from old contents onto this one
|
||||||
|
//1. Clip a rectangular region defning the data
|
||||||
|
//2. Perform a GPU blit
|
||||||
|
u16 parent_w = surface->old_contents->width();
|
||||||
|
u16 parent_h = surface->old_contents->height();
|
||||||
|
u16 copy_w, copy_h;
|
||||||
|
|
||||||
|
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
|
||||||
|
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
|
||||||
|
surface->set_cleared();
|
||||||
|
}
|
||||||
|
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
|
||||||
|
|
||||||
|
surface->old_contents = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
//Check if we have any 'recycled' surfaces in memory and if so, clear them
|
||||||
|
std::vector<int> buffers_to_clear;
|
||||||
|
bool clear_all_color = true;
|
||||||
|
bool clear_depth = false;
|
||||||
|
|
||||||
|
for (int index = 0; index < 4; index++)
|
||||||
|
{
|
||||||
|
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
|
||||||
|
{
|
||||||
|
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
|
||||||
|
clear_all_color = false;
|
||||||
|
else
|
||||||
|
buffers_to_clear.push_back(index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||||
|
if (ds && !ds->cleared())
|
||||||
|
{
|
||||||
|
clear_depth = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Temporarily disable pixel tests
|
||||||
|
glDisable(GL_SCISSOR_TEST);
|
||||||
|
|
||||||
|
if (clear_depth || buffers_to_clear.size() > 0)
|
||||||
|
{
|
||||||
|
GLenum mask = 0;
|
||||||
|
|
||||||
|
if (clear_depth)
|
||||||
|
{
|
||||||
|
gl_state.depth_mask(GL_TRUE);
|
||||||
|
gl_state.clear_depth(1.0);
|
||||||
|
gl_state.clear_stencil(255);
|
||||||
|
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clear_all_color)
|
||||||
|
mask |= GL_COLOR_BUFFER_BIT;
|
||||||
|
|
||||||
|
glClear(mask);
|
||||||
|
|
||||||
|
if (buffers_to_clear.size() > 0 && !clear_all_color)
|
||||||
|
{
|
||||||
|
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
|
||||||
|
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
|
||||||
|
for (auto &i : buffers_to_clear)
|
||||||
|
glClearBufferfv(draw_fbo.id(), i, colors);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clear_depth)
|
||||||
|
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
||||||
|
|
||||||
|
ds->set_cleared();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
|
||||||
|
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
|
||||||
|
{
|
||||||
|
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
|
||||||
|
ds->old_contents = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_cfg.video.strict_rendering_mode)
|
||||||
|
{
|
||||||
|
if (ds && ds->old_contents != nullptr)
|
||||||
|
copy_rtt_contents(ds);
|
||||||
|
|
||||||
|
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||||
|
{
|
||||||
|
if (auto surface = std::get<1>(rtt))
|
||||||
|
{
|
||||||
|
if (surface->old_contents != nullptr)
|
||||||
|
copy_rtt_contents(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Old contents are one use only. Keep the depth conversion check from firing over and over
|
||||||
|
if (ds) ds->old_contents = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
glEnable(GL_SCISSOR_TEST);
|
||||||
|
|
||||||
//Load textures
|
//Load textures
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> textures_start = steady_clock::now();
|
||||||
|
@ -359,113 +466,6 @@ void GLGSRender::end()
|
||||||
|
|
||||||
update_draw_state();
|
update_draw_state();
|
||||||
|
|
||||||
//Check if depth buffer is bound and valid
|
|
||||||
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
|
|
||||||
auto copy_rtt_contents = [](gl::render_target *surface)
|
|
||||||
{
|
|
||||||
if (surface->get_compatible_internal_format() == surface->old_contents->get_compatible_internal_format())
|
|
||||||
{
|
|
||||||
//Copy data from old contents onto this one
|
|
||||||
//1. Clip a rectangular region defning the data
|
|
||||||
//2. Perform a GPU blit
|
|
||||||
u16 parent_w = surface->old_contents->width();
|
|
||||||
u16 parent_h = surface->old_contents->height();
|
|
||||||
u16 copy_w, copy_h;
|
|
||||||
|
|
||||||
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
|
|
||||||
glCopyImageSubData(surface->old_contents->id(), GL_TEXTURE_2D, 0, 0, 0, 0, surface->id(), GL_TEXTURE_2D, 0, 0, 0, 0, copy_w, copy_h, 1);
|
|
||||||
surface->set_cleared();
|
|
||||||
}
|
|
||||||
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
|
|
||||||
|
|
||||||
surface->old_contents = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
//Check if we have any 'recycled' surfaces in memory and if so, clear them
|
|
||||||
std::vector<int> buffers_to_clear;
|
|
||||||
bool clear_all_color = true;
|
|
||||||
bool clear_depth = false;
|
|
||||||
|
|
||||||
for (int index = 0; index < 4; index++)
|
|
||||||
{
|
|
||||||
if (std::get<0>(m_rtts.m_bound_render_targets[index]) != 0)
|
|
||||||
{
|
|
||||||
if (std::get<1>(m_rtts.m_bound_render_targets[index])->cleared())
|
|
||||||
clear_all_color = false;
|
|
||||||
else
|
|
||||||
buffers_to_clear.push_back(index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
gl::render_target *ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
|
||||||
if (ds && !ds->cleared())
|
|
||||||
{
|
|
||||||
clear_depth = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Temporarily disable pixel tests
|
|
||||||
glDisable(GL_SCISSOR_TEST);
|
|
||||||
|
|
||||||
if (clear_depth || buffers_to_clear.size() > 0)
|
|
||||||
{
|
|
||||||
GLenum mask = 0;
|
|
||||||
|
|
||||||
if (clear_depth)
|
|
||||||
{
|
|
||||||
gl_state.depth_mask(GL_TRUE);
|
|
||||||
gl_state.clear_depth(1.0);
|
|
||||||
gl_state.clear_stencil(255);
|
|
||||||
mask |= GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clear_all_color)
|
|
||||||
mask |= GL_COLOR_BUFFER_BIT;
|
|
||||||
|
|
||||||
glClear(mask);
|
|
||||||
|
|
||||||
if (buffers_to_clear.size() > 0 && !clear_all_color)
|
|
||||||
{
|
|
||||||
GLfloat colors[] = { 0.f, 0.f, 0.f, 0.f };
|
|
||||||
//It is impossible for the render target to be typa A or B here (clear all would have been flagged)
|
|
||||||
for (auto &i: buffers_to_clear)
|
|
||||||
glClearBufferfv(draw_fbo.id(), i, colors);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (clear_depth)
|
|
||||||
gl_state.depth_mask(rsx::method_registers.depth_write_enabled());
|
|
||||||
|
|
||||||
ds->set_cleared();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ds && ds->old_contents != nullptr && ds->get_rsx_pitch() == ds->old_contents->get_rsx_pitch() &&
|
|
||||||
ds->old_contents->get_compatible_internal_format() == gl::texture::internal_format::rgba8)
|
|
||||||
{
|
|
||||||
m_depth_converter.run(ds->width(), ds->height(), ds->id(), ds->old_contents->id());
|
|
||||||
ds->old_contents = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_cfg.video.strict_rendering_mode)
|
|
||||||
{
|
|
||||||
if (ds && ds->old_contents != nullptr)
|
|
||||||
copy_rtt_contents(ds);
|
|
||||||
|
|
||||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
|
||||||
{
|
|
||||||
if (auto surface = std::get<1>(rtt))
|
|
||||||
{
|
|
||||||
if (surface->old_contents != nullptr)
|
|
||||||
copy_rtt_contents(surface);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Old contents are one use only. Keep the depth conversion check from firing over and over
|
|
||||||
if (ds) ds->old_contents = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
glEnable(GL_SCISSOR_TEST);
|
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
||||||
|
|
||||||
if (g_cfg.video.debug_output)
|
if (g_cfg.video.debug_output)
|
||||||
|
|
|
@ -1072,6 +1072,82 @@ void VKGSRender::end()
|
||||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> textures_start = vertex_end;
|
std::chrono::time_point<steady_clock> textures_start = vertex_end;
|
||||||
|
|
||||||
|
|
||||||
|
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
||||||
|
|
||||||
|
//Check for data casts
|
||||||
|
if (ds && ds->old_contents)
|
||||||
|
{
|
||||||
|
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
|
||||||
|
{
|
||||||
|
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
|
||||||
|
auto render_pass = m_render_passes[rp];
|
||||||
|
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
|
||||||
|
|
||||||
|
ds->old_contents = nullptr;
|
||||||
|
ds->dirty = false;
|
||||||
|
}
|
||||||
|
else if (!g_cfg.video.strict_rendering_mode)
|
||||||
|
{
|
||||||
|
//Clear this to avoid dereferencing stale ptr
|
||||||
|
ds->old_contents = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_cfg.video.strict_rendering_mode)
|
||||||
|
{
|
||||||
|
auto copy_rtt_contents = [&](vk::render_target* surface)
|
||||||
|
{
|
||||||
|
if (surface->info.format == surface->old_contents->info.format)
|
||||||
|
{
|
||||||
|
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
|
||||||
|
|
||||||
|
const u16 parent_w = surface->old_contents->width();
|
||||||
|
const u16 parent_h = surface->old_contents->height();
|
||||||
|
u16 copy_w, copy_h;
|
||||||
|
|
||||||
|
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
|
||||||
|
|
||||||
|
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
|
||||||
|
VkImageLayout old_layout = surface->current_layout;
|
||||||
|
|
||||||
|
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
|
||||||
|
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
||||||
|
|
||||||
|
VkImageCopy copy_rgn;
|
||||||
|
copy_rgn.srcOffset = { 0, 0, 0 };
|
||||||
|
copy_rgn.dstOffset = { 0, 0, 0 };
|
||||||
|
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
|
||||||
|
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
|
||||||
|
copy_rgn.extent = { copy_w, copy_h, 1 };
|
||||||
|
|
||||||
|
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn);
|
||||||
|
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
|
||||||
|
|
||||||
|
surface->dirty = false;
|
||||||
|
}
|
||||||
|
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
|
||||||
|
|
||||||
|
surface->old_contents = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
//Prepare surfaces if needed
|
||||||
|
for (auto &rtt : m_rtts.m_bound_render_targets)
|
||||||
|
{
|
||||||
|
if (auto surface = std::get<1>(rtt))
|
||||||
|
{
|
||||||
|
if (surface->old_contents != nullptr)
|
||||||
|
copy_rtt_contents(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ds && ds->old_contents)
|
||||||
|
{
|
||||||
|
copy_rtt_contents(ds);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Load textures
|
//Load textures
|
||||||
{
|
{
|
||||||
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
std::lock_guard<shared_mutex> lock(m_sampler_mutex);
|
||||||
|
@ -1288,80 +1364,6 @@ void VKGSRender::end()
|
||||||
//Only textures are synchronized tightly with the GPU and they have been read back above
|
//Only textures are synchronized tightly with the GPU and they have been read back above
|
||||||
vk::enter_uninterruptible();
|
vk::enter_uninterruptible();
|
||||||
|
|
||||||
auto ds = std::get<1>(m_rtts.m_bound_depth_stencil);
|
|
||||||
|
|
||||||
//Check for data casts
|
|
||||||
if (ds && ds->old_contents)
|
|
||||||
{
|
|
||||||
if (ds->old_contents->info.format == VK_FORMAT_B8G8R8A8_UNORM)
|
|
||||||
{
|
|
||||||
auto rp = vk::get_render_pass_location(VK_FORMAT_UNDEFINED, ds->info.format, 0);
|
|
||||||
auto render_pass = m_render_passes[rp];
|
|
||||||
m_depth_converter->run(*m_current_command_buffer, ds->width(), ds->height(), ds, ds->old_contents->get_view(0xAAE4, rsx::default_remap_vector), render_pass, m_framebuffers_to_clean);
|
|
||||||
|
|
||||||
ds->old_contents = nullptr;
|
|
||||||
ds->dirty = false;
|
|
||||||
}
|
|
||||||
else if (!g_cfg.video.strict_rendering_mode)
|
|
||||||
{
|
|
||||||
//Clear this to avoid dereferencing stale ptr
|
|
||||||
ds->old_contents = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_cfg.video.strict_rendering_mode)
|
|
||||||
{
|
|
||||||
auto copy_rtt_contents = [&](vk::render_target* surface)
|
|
||||||
{
|
|
||||||
if (surface->info.format == surface->old_contents->info.format)
|
|
||||||
{
|
|
||||||
const VkImageAspectFlags aspect = surface->attachment_aspect_flag;
|
|
||||||
|
|
||||||
const u16 parent_w = surface->old_contents->width();
|
|
||||||
const u16 parent_h = surface->old_contents->height();
|
|
||||||
u16 copy_w, copy_h;
|
|
||||||
|
|
||||||
std::tie(std::ignore, std::ignore, copy_w, copy_h) = rsx::clip_region<u16>(parent_w, parent_h, 0, 0, surface->width(), surface->height(), true);
|
|
||||||
|
|
||||||
VkImageSubresourceRange subresource_range = { aspect, 0, 1, 0, 1 };
|
|
||||||
VkImageLayout old_layout = surface->current_layout;
|
|
||||||
|
|
||||||
vk::change_image_layout(*m_current_command_buffer, surface, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
|
|
||||||
vk::change_image_layout(*m_current_command_buffer, surface->old_contents, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, subresource_range);
|
|
||||||
|
|
||||||
VkImageCopy copy_rgn;
|
|
||||||
copy_rgn.srcOffset = { 0, 0, 0 };
|
|
||||||
copy_rgn.dstOffset = { 0, 0, 0 };
|
|
||||||
copy_rgn.dstSubresource = { aspect, 0, 0, 1 };
|
|
||||||
copy_rgn.srcSubresource = { aspect, 0, 0, 1 };
|
|
||||||
copy_rgn.extent = { copy_w, copy_h, 1 };
|
|
||||||
|
|
||||||
vkCmdCopyImage(*m_current_command_buffer, surface->old_contents->value, surface->old_contents->current_layout, surface->value, surface->current_layout, 1, ©_rgn);
|
|
||||||
vk::change_image_layout(*m_current_command_buffer, surface, old_layout, subresource_range);
|
|
||||||
|
|
||||||
surface->dirty = false;
|
|
||||||
}
|
|
||||||
//TODO: download image contents and reupload them or do a memory cast to copy memory contents if not compatible
|
|
||||||
|
|
||||||
surface->old_contents = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
//Prepare surfaces if needed
|
|
||||||
for (auto &rtt : m_rtts.m_bound_render_targets)
|
|
||||||
{
|
|
||||||
if (auto surface = std::get<1>(rtt))
|
|
||||||
{
|
|
||||||
if (surface->old_contents != nullptr)
|
|
||||||
copy_rtt_contents(surface);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ds && ds->old_contents)
|
|
||||||
{
|
|
||||||
copy_rtt_contents(ds);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 occlusion_id = 0;
|
u32 occlusion_id = 0;
|
||||||
if (m_occlusion_query_active)
|
if (m_occlusion_query_active)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue