rsx: Improve present image scanning

- Adds support for partial (letterboxed) source images by taking insets
into account.
- Bugfix for potential access violation when capturing screenshot on
vulkan
This commit is contained in:
kd-11 2020-01-17 22:44:59 +03:00 committed by kd-11
parent 7453e46a7c
commit bad4d1ff05
2 changed files with 152 additions and 170 deletions

View file

@ -5,46 +5,34 @@ GLuint GLGSRender::get_present_source(gl::present_surface_info* info, const rsx:
{ {
GLuint image = GL_NONE; GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address)) // Check the surface store first
gl::command_context cmd = { gl_state };
const auto format_bpp = get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
if (!overlap_info.empty())
{ {
if (render_target_texture->last_use_tag == m_rtts.write_tag) const auto& section = overlap_info.back();
auto surface = gl::as_rtt(section.surface);
if (section.base_address >= info->address)
{ {
image = render_target_texture->raw_handle(); // Check for intentional 'borders'
} const u32 inset_offset = section.base_address - info->address;
else const u32 inset_y = inset_offset / info->pitch;
{ const u32 inset_x = (inset_offset % info->pitch) / format_bpp;
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture) const u32 full_width = surface->get_surface_width(rsx::surface_metrics::samples) + inset_x + inset_x;
const u32 full_height = surface->get_surface_height(rsx::surface_metrics::samples) + inset_y + inset_y;
if (full_width == info->width && full_height == info->height)
{ {
// Confirmed to be the newest data source in that range surface->read_barrier(cmd);
image = render_target_texture->raw_handle(); image = section.surface->get_surface(rsx::surface_access::read)->id();
}
}
if (image) info->width = rsx::apply_resolution_scale(full_width - (inset_x + inset_x), true);
{ info->height = rsx::apply_resolution_scale(full_height - (inset_y + inset_y), true);
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
} }
} }
} }
@ -118,7 +106,24 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
gl::screen.bind(); gl::screen.bind();
gl::screen.clear(gl::buffers::color); gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates GLuint image_to_flip = GL_NONE;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
// Calculate blit coordinates
coordi aspect_ratio; coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height()); sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize; sizei new_size = csize;
@ -143,20 +148,8 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
aspect_ratio.size = new_size; aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height) if (image_to_flip)
{ {
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
const GLuint image = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
if (m_frame->screenshot_toggle == true) if (m_frame->screenshot_toggle == true)
{ {
m_frame->screenshot_toggle = false; m_frame->screenshot_toggle = false;
@ -167,9 +160,9 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
pack_settings.apply(); pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported) if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data()); glGetTextureImage(image_to_flip, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data()); glGetTextureImageEXT(image_to_flip, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR) if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err); LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
@ -184,7 +177,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
// Blit source image to the screen // Blit source image to the screen
m_flip_fbo.recreate(); m_flip_fbo.recreate();
m_flip_fbo.bind(); m_flip_fbo.bind();
m_flip_fbo.color = image; m_flip_fbo.color = image_to_flip;
m_flip_fbo.read_buffer(m_flip_fbo.color); m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color); m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear); m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
@ -195,7 +188,7 @@ void GLGSRender::flip(const rsx::display_flip_info_t& info)
const bool limited_range = !g_cfg.video.full_rgb_range_output; const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind(); gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range); m_video_output_pass.run(areau(aspect_ratio), image_to_flip, gamma, limited_range);
} }
} }

View file

@ -265,51 +265,40 @@ vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const
{ {
vk::image* image_to_flip = nullptr; vk::image* image_to_flip = nullptr;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address)) // Check the surface store first
const auto format_bpp = get_format_block_size_in_bytes(info->format);
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer,
info->address, info->width, info->height, info->pitch, format_bpp, rsx::surface_access::read);
if (!overlap_info.empty())
{ {
if (render_target_texture->last_use_tag == m_rtts.write_tag) const auto& section = overlap_info.back();
{ auto surface = vk::as_rtt(section.surface);
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
if (image_to_flip) if (section.base_address >= info->address)
{ {
const auto buffer_width = rsx::apply_resolution_scale(info->width, true); // Check for intentional 'borders'
const auto buffer_height = rsx::apply_resolution_scale(info->height, true); const u32 inset_offset = section.base_address - info->address;
const u32 inset_y = inset_offset / info->pitch;
const u32 inset_x = (inset_offset % info->pitch) / format_bpp;
if (buffer_width > render_target_texture->width() || const u32 full_width = surface->get_surface_width(rsx::surface_metrics::samples) + inset_x + inset_x;
buffer_height > render_target_texture->height()) const u32 full_height = surface->get_surface_height(rsx::surface_metrics::samples) + inset_y + inset_y;
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width(); if (full_width == info->width && full_height == info->height)
info->height = render_target_texture->height();
}
else
{ {
info->width = buffer_width; surface->read_barrier(*m_current_command_buffer);
info->height = buffer_height; image_to_flip = section.surface->get_surface(rsx::surface_access::read);
info->width = rsx::apply_resolution_scale(full_width - (inset_x + inset_x), true);
info->height = rsx::apply_resolution_scale(full_height - (inset_y + inset_y), true);
} }
} }
} }
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height)) else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{ {
//Hack - this should be the first location to check for output // Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display // The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture(); image_to_flip = surface->get_raw_texture();
} }
@ -426,32 +415,23 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
buffer_pitch = buffer_width * 4; buffer_pitch = buffer_width * 4;
} }
coordi aspect_ratio; // Scan memory for required data. This is done early to optimize waiting for the driver image acquire below.
vk::image* image_to_flip = nullptr;
sizei csize = m_swapchain_dims; if (info.buffer < display_buffers_count && buffer_width && buffer_height)
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{ {
const double aq = 1. * buffer_width / buffer_height; vk::present_surface_info present_info;
const double rq = 1. * new_size.width / new_size.height; present_info.width = buffer_width;
const double q = aq / rq; present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
if (q > 1.0) image_to_flip = get_present_source(&present_info, avconfig);
{ buffer_width = present_info.width;
new_size.height = static_cast<int>(new_size.height / q); buffer_height = present_info.height;
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
} }
aspect_ratio.size = new_size; // Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX; verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr; verify(HERE), m_current_frame->swap_command_buffer == nullptr;
@ -463,14 +443,14 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
case VK_TIMEOUT: case VK_TIMEOUT:
case VK_NOT_READY: case VK_NOT_READY:
{ {
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images. // In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released // This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available // before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image // This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2 // Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin // Whatever returned from status, this is now a spin
timeout = 0ull; timeout = 0ull;
check_present_status(); check_present_status();
continue; continue;
@ -488,26 +468,35 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
} }
} }
//Confirm that the driver did not silently fail // Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX; verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen.. // Calculate output dimensions. Done after swapchain acquisition in case it was recreated.
vk::image* image_to_flip = nullptr; coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (info.buffer < display_buffers_count && buffer_width && buffer_height) if (!g_cfg.video.stretch_to_display_area)
{ {
vk::present_surface_info present_info; const double aq = 1. * buffer_width / buffer_height;
present_info.width = buffer_width; const double rq = 1. * new_size.width / new_size.height;
present_info.height = buffer_height; const double q = aq / rq;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
image_to_flip = get_present_source(&present_info, avconfig); if (q > 1.0)
buffer_width = present_info.width; {
buffer_height = present_info.height; new_size.height = static_cast<int>(new_size.height / q);
} aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
// Blit contents to screen..
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image); VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout(); const auto present_layout = m_swapchain->get_optimal_present_layout();
@ -558,11 +547,48 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
direct_fbo->release(); direct_fbo->release();
} }
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
} }
else else
{ {
//No draw call was issued! // No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering // TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {}; VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range); vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range); vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
@ -570,43 +596,6 @@ void VKGSRender::flip(const rsx::display_flip_info_t& info)
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
} }
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible()); const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay) if (g_cfg.video.overlay || has_overlay)
{ {