rsx: Refactor out complex present code into separate files

- Also restructures present code to have image lookup in a separate
re-usable function.
This commit is contained in:
kd-11 2020-01-17 19:24:33 +03:00 committed by kd-11
parent b07b5c9005
commit 7453e46a7c
11 changed files with 1293 additions and 1213 deletions

View file

@ -379,6 +379,7 @@ target_sources(rpcs3_emu PRIVATE
RSX/GL/GLFragmentProgram.cpp RSX/GL/GLFragmentProgram.cpp
RSX/GL/GLGSRender.cpp RSX/GL/GLGSRender.cpp
RSX/GL/GLHelpers.cpp RSX/GL/GLHelpers.cpp
RSX/GL/GLPresent.cpp
RSX/GL/GLRenderTargets.cpp RSX/GL/GLRenderTargets.cpp
RSX/GL/GLTexture.cpp RSX/GL/GLTexture.cpp
RSX/GL/GLVertexBuffers.cpp RSX/GL/GLVertexBuffers.cpp
@ -397,6 +398,7 @@ if(TARGET 3rdparty_vulkan)
RSX/VK/VKGSRender.cpp RSX/VK/VKGSRender.cpp
RSX/VK/VKHelpers.cpp RSX/VK/VKHelpers.cpp
RSX/VK/VKMemAlloc.cpp RSX/VK/VKMemAlloc.cpp
RSX/VK/VKPresent.cpp
RSX/VK/VKProgramPipeline.cpp RSX/VK/VKProgramPipeline.cpp
RSX/VK/VKRenderPass.cpp RSX/VK/VKRenderPass.cpp
RSX/VK/VKResolveHelper.cpp RSX/VK/VKResolveHelper.cpp

View file

@ -1482,266 +1482,6 @@ void GLGSRender::update_draw_state()
m_frame_stats.setup_time += m_profiler.duration(); m_frame_stats.setup_time += m_profiler.duration();
} }
void GLGSRender::flip(const rsx::display_flip_info_t& info)
{
if (info.skip_frame)
{
m_frame->flip(m_context, true);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
// Disable scissor test (affects blit, clear, etc)
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// Clear the window background to black
gl_state.clear_color(0, 0, 0, 0);
gl::screen.bind();
gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image = render_target_texture->raw_handle();
}
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
buffer_width = rsx::apply_resolution_scale(buffer_width, true);
buffer_height = rsx::apply_resolution_scale(buffer_height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
}
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions<true>(absolute_address, av_format, buffer_width, buffer_height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
if (const auto tex = surface->get_raw_texture(); tex) image = tex->id();
}
if (!image)
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
gl::pixel_unpack_settings unpack_settings;
unpack_settings.alignment(1).row_length(buffer_pitch / 4);
if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast<int>(buffer_width), static_cast<int>(buffer_height) })
{
m_flip_tex_color = std::make_unique<gl::texture>(GL_TEXTURE_2D, buffer_width, buffer_height, 1, 1, GL_RGBA8);
}
gl::command_context cmd{ gl_state };
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read);
m_flip_tex_color->copy_from(vm::base(absolute_address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings);
image = m_flip_tex_color->id();
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
std::vector<u8> sshot_frame(buffer_height * buffer_width * 4);
gl::pixel_pack_settings pack_settings{};
pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
else
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
areai screen_area = coordi({}, { static_cast<int>(buffer_width), static_cast<int>(buffer_height) });
if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f))
{
// Blit source image to the screen
m_flip_fbo.recreate();
m_flip_fbo.bind();
m_flip_fbo.color = image;
m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
}
else
{
const f32 gamma = avconfig->gamma;
const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range);
}
}
if (m_overlay_manager)
{
if (m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer.remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
if (m_overlay_manager->has_visible())
{
gl::screen.bind();
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer.run(areau(aspect_ratio), 0, *view.get());
}
}
}
if (g_cfg.video.overlay)
{
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time));
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time));
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time));
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time));
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
m_frame->flip(m_context);
rsx::thread::flip(info);
// Cleanup
m_gl_texture_cache.on_frame_end();
m_vertex_cache->purge();
auto removed_textures = m_rtts.free_invalidated();
m_framebuffer_cache.remove_if([&](auto& fbo)
{
if (fbo.unused_check_count() >= 2) return true; // Remove if stale
if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid
return false;
});
if (m_draw_fbo && !m_rtts_dirty)
{
// Always restore the active framebuffer
m_draw_fbo->bind();
set_viewport();
set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped));
}
}
bool GLGSRender::on_access_violation(u32 address, bool is_writing) bool GLGSRender::on_access_violation(u32 address, bool is_writing)
{ {
const bool can_flush = (std::this_thread::get_id() == m_rsx_thread); const bool can_flush = (std::this_thread::get_id() == m_rsx_thread);
@ -1762,7 +1502,7 @@ bool GLGSRender::on_access_violation(u32 address, bool is_writing)
if (result.num_flushable > 0) if (result.num_flushable > 0)
{ {
work_item &task = post_flush_request(address, result); auto &task = post_flush_request(address, result);
vm::temporary_unlock(); vm::temporary_unlock();
task.producer_wait(); task.producer_wait();
@ -1802,9 +1542,9 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
{ {
std::lock_guard lock(queue_guard); std::lock_guard lock(queue_guard);
work_queue.remove_if([](work_item &q) { return q.received; }); work_queue.remove_if([](auto &q) { return q.received; });
for (work_item& q : work_queue) for (auto& q : work_queue)
{ {
if (q.processed) continue; if (q.processed) continue;
@ -1843,11 +1583,11 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
} }
} }
work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data) gl::work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data)
{ {
std::lock_guard lock(queue_guard); std::lock_guard lock(queue_guard);
work_item &result = work_queue.emplace_back(); auto &result = work_queue.emplace_back();
result.address_to_flush = address; result.address_to_flush = address;
result.section_data = std::move(flush_data); result.section_data = std::move(flush_data);
return result; return result;

View file

@ -33,7 +33,6 @@ namespace gl
u32 volatile_mapping_offset; u32 volatile_mapping_offset;
std::optional<std::tuple<GLenum, u32> > index_info; std::optional<std::tuple<GLenum, u32> > index_info;
}; };
}
struct work_item struct work_item
{ {
@ -55,6 +54,16 @@ struct work_item
} }
}; };
struct present_surface_info
{
u32 address;
u32 format;
u32 width;
u32 height;
u32 pitch;
};
}
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{ {
private: private:
@ -103,7 +112,7 @@ private:
gl::video_out_calibration_pass m_video_output_pass; gl::video_out_calibration_pass m_video_output_pass;
shared_mutex queue_guard; shared_mutex queue_guard;
std::list<work_item> work_queue; std::list<gl::work_item> work_queue;
GLProgramBuffer m_prog_buffer; GLProgramBuffer m_prog_buffer;
draw_context_t m_decompiler_context; draw_context_t m_decompiler_context;
@ -145,12 +154,14 @@ private:
void update_draw_state(); void update_draw_state();
GLuint get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig);
public: public:
void read_buffers(); void read_buffers();
void set_viewport(); void set_viewport();
void set_scissor(bool clip_viewport); void set_scissor(bool clip_viewport);
work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data); gl::work_item& post_flush_request(u32 address, gl::texture_cache::thrashed_set& flush_data);
bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override; bool scaled_image_from_memory(rsx::blit_src_info& src_info, rsx::blit_dst_info& dst_info, bool interpolate) override;

View file

@ -0,0 +1,283 @@
#include "stdafx.h"
#include "GLGSRender.h"
GLuint GLGSRender::get_present_source(gl::present_surface_info* info, const rsx::avconf* avconfig)
{
GLuint image = GL_NONE;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image = render_target_texture->raw_handle();
}
else
{
gl::command_context cmd = { gl_state };
const auto overlap_info = m_rtts.get_merged_texture_memory_region(cmd, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image = render_target_texture->raw_handle();
}
}
if (image)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
}
}
}
else if (auto surface = m_gl_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
if (const auto tex = surface->get_raw_texture(); tex) image = tex->id();
}
if (!image)
{
LOG_WARNING(RSX, "Flip texture was not found in cache. Uploading surface from CPU");
gl::pixel_unpack_settings unpack_settings;
unpack_settings.alignment(1).row_length(info->pitch / 4);
if (!m_flip_tex_color || m_flip_tex_color->size2D() != sizei{ static_cast<int>(info->width), static_cast<int>(info->height) })
{
m_flip_tex_color = std::make_unique<gl::texture>(GL_TEXTURE_2D, info->width, info->height, 1, 1, GL_RGBA8);
}
gl::command_context cmd{ gl_state };
const auto range = utils::address_range::start_length(info->address, info->pitch * info->height);
m_gl_texture_cache.invalidate_range(cmd, range, rsx::invalidation_cause::read);
m_flip_tex_color->copy_from(vm::base(info->address), gl::texture::format::bgra, gl::texture::type::uint_8_8_8_8, unpack_settings);
image = m_flip_tex_color->id();
}
return image;
}
void GLGSRender::flip(const rsx::display_flip_info_t& info)
{
if (info.skip_frame)
{
m_frame->flip(m_context, true);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
// Disable scissor test (affects blit, clear, etc)
gl_state.enable(GL_FALSE, GL_SCISSOR_TEST);
// Clear the window background to black
gl_state.clear_color(0, 0, 0, 0);
gl::screen.bind();
gl::screen.clear(gl::buffers::color);
// Calculate blit coordinates
coordi aspect_ratio;
sizei csize(m_frame->client_width(), m_frame->client_height());
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
// Find the source image
gl::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
const GLuint image = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
std::vector<u8> sshot_frame(buffer_height * buffer_width * 4);
gl::pixel_pack_settings pack_settings{};
pack_settings.apply();
if (gl::get_driver_caps().ARB_dsa_supported)
glGetTextureImage(image, 0, GL_BGRA, GL_UNSIGNED_BYTE, buffer_height * buffer_width * 4, sshot_frame.data());
else
glGetTextureImageEXT(image, GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_BYTE, sshot_frame.data());
if (GLenum err; (err = glGetError()) != GL_NO_ERROR)
LOG_ERROR(GENERAL, "[Screenshot] Failed to capture image: 0x%x", err);
else
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
areai screen_area = coordi({}, { static_cast<int>(buffer_width), static_cast<int>(buffer_height) });
if (g_cfg.video.full_rgb_range_output && rsx::fcmp(avconfig->gamma, 1.f))
{
// Blit source image to the screen
m_flip_fbo.recreate();
m_flip_fbo.bind();
m_flip_fbo.color = image;
m_flip_fbo.read_buffer(m_flip_fbo.color);
m_flip_fbo.draw_buffer(m_flip_fbo.color);
m_flip_fbo.blit(gl::screen, screen_area, areai(aspect_ratio).flipped_vertical(), gl::buffers::color, gl::filter::linear);
}
else
{
const f32 gamma = avconfig->gamma;
const bool limited_range = !g_cfg.video.full_rgb_range_output;
gl::screen.bind();
m_video_output_pass.run(areau(aspect_ratio), image, gamma, limited_range);
}
}
if (m_overlay_manager)
{
if (m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer.remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
if (m_overlay_manager->has_visible())
{
gl::screen.bind();
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer.run(areau(aspect_ratio), 0, *view.get());
}
}
}
if (g_cfg.video.overlay)
{
gl::screen.bind();
glViewport(0, 0, m_frame->client_width(), m_frame->client_height());
m_text_printer.print_text(0, 0, m_frame->client_width(), m_frame->client_height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_printer.print_text(0, 18, m_frame->client_width(), m_frame->client_height(), fmt::format("draw calls: %16d", info.stats.draw_calls));
m_text_printer.print_text(0, 36, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call setup: %11dus", info.stats.setup_time));
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), fmt::format("vertex upload time: %8dus", info.stats.vertex_upload_time));
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), fmt::format("textures upload time: %6dus", info.stats.textures_upload_time));
m_text_printer.print_text(0, 90, m_frame->client_width(), m_frame->client_height(), fmt::format("draw call execution: %7dus", info.stats.draw_exec_time));
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_gl_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_gl_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_gl_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), fmt::format("Unreleased textures: %7d", num_dirty_textures));
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Texture memory: %12dM", texture_memory_size));
m_text_printer.print_text(0, 162, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %12d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
m_frame->flip(m_context);
rsx::thread::flip(info);
// Cleanup
m_gl_texture_cache.on_frame_end();
m_vertex_cache->purge();
auto removed_textures = m_rtts.free_invalidated();
m_framebuffer_cache.remove_if([&](auto& fbo)
{
if (fbo.unused_check_count() >= 2) return true; // Remove if stale
if (fbo.references_any(removed_textures)) return true; // Remove if any of the attachments is invalid
return false;
});
if (m_draw_fbo && !m_rtts_dirty)
{
// Always restore the active framebuffer
m_draw_fbo->bind();
set_viewport();
set_scissor(!!(m_graphics_state & rsx::pipeline_state::scissor_setup_clipped));
}
}

View file

@ -873,7 +873,7 @@ void VKGSRender::check_heap_status(u32 flags)
{ {
m_profiler.start(); m_profiler.start();
frame_context_t *target_frame = nullptr; vk::frame_context_t *target_frame = nullptr;
if (!m_queued_frames.empty()) if (!m_queued_frames.empty())
{ {
if (m_current_frame != &m_aux_frame_context) if (m_current_frame != &m_aux_frame_context)
@ -2224,196 +2224,6 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, void* args)
} }
} }
void VKGSRender::advance_queued_frames()
{
// Check all other frames for completion and clear resources
check_present_status();
//m_rtts storage is double buffered and should be safe to tag on frame boundary
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
vk::remove_unused_framebuffers();
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
m_queued_frames.push_back(m_current_frame);
verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES;
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->flags |= frame_context_state::dirty;
vk::advance_frame_counter();
}
void VKGSRender::present(frame_context_t *ctx)
{
verify(HERE), ctx->present_image != UINT32_MAX;
// Partial CS flush
ctx->swap_command_buffer->flush();
if (!swapchain_unavailable)
{
switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image))
{
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
swapchain_unavailable = true;
break;
default:
vk::die_with_error(HERE, error);
}
}
// Presentation image released; reset value
ctx->present_image = UINT32_MAX;
}
void VKGSRender::queue_swap_request()
{
verify(HERE), !m_current_frame->swap_command_buffer;
m_current_frame->swap_command_buffer = m_current_command_buffer;
if (m_swapchain->is_headless())
{
m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
}
else
{
close_and_submit_command_buffer(m_current_command_buffer->submit_fence,
m_current_frame->acquire_signal_semaphore,
m_current_frame->present_wait_semaphore,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
// Signal pending state as the command queue is now closed
m_current_frame->swap_command_buffer->pending = true;
// Set up a present request for this frame as well
present(m_current_frame);
// Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
// Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources)
{
verify(HERE), ctx->swap_command_buffer;
if (ctx->swap_command_buffer->pending)
{
// Perform hard swap here
if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS)
{
// Lost surface/device, release swapchain
swapchain_unavailable = true;
}
free_resources = true;
}
if (free_resources)
{
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
}
if (m_overlay_manager && m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer->remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
vk::reset_global_resources();
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_ui_renderer->free_resources();
m_video_output_pass->free_resources();
ctx->buffer_views_to_clean.clear();
if (ctx->last_frame_sync_time > m_last_heap_sync_time)
{
m_last_heap_sync_time = ctx->last_frame_sync_time;
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify();
m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
}
}
ctx->swap_command_buffer = nullptr;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
void VKGSRender::do_local_task(rsx::FIFO_state state) void VKGSRender::do_local_task(rsx::FIFO_state state)
{ {
if (m_queue_status & flush_queue_state::deadlock) if (m_queue_status & flush_queue_state::deadlock)
@ -3115,483 +2925,6 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
check_zcull_status(true); check_zcull_status(true);
} }
void VKGSRender::reinitialize_swapchain()
{
m_swapchain_dims.width = m_frame->client_width();
m_swapchain_dims.height = m_frame->client_height();
// Reject requests to acquire new swapchain if the window is minimized
// The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
// However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0)
{
swapchain_unavailable = true;
return;
}
// NOTE: This operation will create a hard sync point
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
for (auto &ctx : frame_context_storage)
{
if (ctx.present_image == UINT32_MAX)
continue;
// Release present image by presenting it
frame_context_cleanup(&ctx, true);
}
// Drain all the queues
vkDeviceWaitIdle(*m_device);
// Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call
if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height))
{
LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height);
swapchain_unavailable = true;
open_command_buffer();
return;
}
// Prepare new swapchain images for use
open_command_buffer();
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
{
const auto target_layout = m_swapchain->get_optimal_present_layout();
const auto target_image = m_swapchain->get_image(i);
VkClearColorValue clear_color{};
VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range);
}
//Will have to block until rendering is completed
vk::fence resize_fence(*m_device);
//Flush the command buffer
close_and_submit_command_buffer(&resize_fence);
vk::wait_for_fence(&resize_fence);
m_current_command_buffer->reset();
open_command_buffer();
swapchain_unavailable = false;
should_reinitialize_swapchain = false;
}
void VKGSRender::flip(const rsx::display_flip_info_t& info)
{
// Check swapchain condition/status
if (!m_swapchain->supports_automatic_wm_reports())
{
if (m_swapchain_dims.width != m_frame->client_width() ||
m_swapchain_dims.height != m_frame->client_height())
{
swapchain_unavailable = true;
}
}
if (swapchain_unavailable || should_reinitialize_swapchain)
{
reinitialize_swapchain();
}
m_profiler.start();
if (m_current_frame == &m_aux_frame_context)
{
m_current_frame = &frame_context_storage[m_current_queue_index];
if (m_current_frame->swap_command_buffer)
{
// Its possible this flip request is triggered by overlays and the flip queue is in undefined state
frame_context_cleanup(m_current_frame, true);
}
// Swap aux storage and current frame; aux storage should always be ready for use at all times
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
else if (m_current_frame->swap_command_buffer)
{
if (info.stats.draw_calls > 0)
{
// This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag
LOG_ERROR(RSX, "Possible data corruption on frame context storage detected");
}
// There were no draws and back-to-back flips happened
frame_context_cleanup(m_current_frame, true);
}
if (info.skip_frame || swapchain_unavailable)
{
if (!info.skip_frame)
{
verify(HERE), swapchain_unavailable;
// Perform a mini-flip here without invoking present code
m_current_frame->swap_command_buffer = m_current_command_buffer;
flush_command_queue(true);
vk::advance_frame_counter();
frame_context_cleanup(m_current_frame, true);
}
m_frame->flip(m_context);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull;
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image))
{
switch (status)
{
case VK_TIMEOUT:
case VK_NOT_READY:
{
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin
timeout = 0ull;
check_present_status();
continue;
}
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated.");
swapchain_unavailable = true;
reinitialize_swapchain();
continue;
default:
vk::die_with_error(HERE, status);
}
}
//Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
const u32 absolute_address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
if (auto render_target_texture = m_rtts.get_color_surface_at(absolute_address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, absolute_address, buffer_width, buffer_height, buffer_pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
if (image_to_flip)
{
buffer_width = rsx::apply_resolution_scale(buffer_width, true);
buffer_height = rsx::apply_resolution_scale(buffer_height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
display_buffers[info.buffer].width, display_buffers[info.buffer].height, avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
buffer_width = render_target_texture->width();
buffer_height = render_target_texture->height();
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(absolute_address, av_format, buffer_width, buffer_height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
if (!image_to_flip)
{
// Read from cell
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
for (const auto & section : overlap)
{
if (!section->is_synchronized())
{
section->copy_texture(*m_current_command_buffer, true);
}
}
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
}
}
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout();
const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VkImageLayout target_layout = present_layout;
VkRenderPass single_target_pass = VK_NULL_HANDLE;
vk::framebuffer_holder* direct_fbo = nullptr;
vk::viewable_image* calibration_src = nullptr;
if (image_to_flip)
{
if (aspect_ratio.x || aspect_ratio.y)
{
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f)))
{
calibration_src = dynamic_cast<vk::viewable_image*>(image_to_flip);
verify("Image handle not viewable!" HERE), calibration_src;
}
if (LIKELY(!calibration_src))
{
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout,
{ 0, 0, static_cast<s32>(buffer_width), static_cast<s32>(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false);
}
else
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass);
image_to_flip->pop_layout(*m_current_command_buffer);
direct_fbo->release();
}
}
else
{
//No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay)
{
if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
// Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.oldLayout = target_layout;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subresource_range;
vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
if (!direct_fbo)
{
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
}
direct_fbo->add_ref();
if (has_overlay)
{
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get());
}
}
if (g_cfg.video.overlay)
{
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_texture_cache.get_cache_miss_ratio() * 100));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
direct_fbo->release();
}
if (target_layout != present_layout)
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range);
}
queue_swap_request();
m_frame_stats.flip_time = m_profiler.duration();
m_frame->flip(m_context);
rsx::thread::flip(info);
}
void VKGSRender::renderctl(u32 request_code, void* args) void VKGSRender::renderctl(u32 request_code, void* args)
{ {
switch (request_code) switch (request_code)

View file

@ -50,21 +50,8 @@ namespace vk
using rsx::flags32_t; using rsx::flags32_t;
extern u64 get_system_time(); extern u64 get_system_time();
enum namespace vk
{ {
VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1,
VK_HEAP_CHECK_VERTEX_STORAGE = 0x2,
VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4,
VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8,
VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10,
VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20,
VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40,
VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80,
VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE,
VK_HEAP_CHECK_ALL = 0xFF,
};
struct command_buffer_chunk: public vk::command_buffer struct command_buffer_chunk: public vk::command_buffer
{ {
vk::fence* submit_fence = nullptr; vk::fence* submit_fence = nullptr;
@ -194,11 +181,6 @@ struct occlusion_data
} }
}; };
enum frame_context_state : u32
{
dirty = 1
};
struct frame_context_t struct frame_context_t
{ {
VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE; VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE;
@ -324,14 +306,45 @@ struct flush_request_task
} }
}; };
struct present_surface_info
{
u32 address;
u32 format;
u32 width;
u32 height;
u32 pitch;
};
}
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:
enum
{
VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE = 0x1,
VK_HEAP_CHECK_VERTEX_STORAGE = 0x2,
VK_HEAP_CHECK_VERTEX_ENV_STORAGE = 0x4,
VK_HEAP_CHECK_FRAGMENT_ENV_STORAGE = 0x8,
VK_HEAP_CHECK_TEXTURE_ENV_STORAGE = 0x10,
VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE = 0x20,
VK_HEAP_CHECK_TRANSFORM_CONSTANTS_STORAGE = 0x40,
VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE = 0x80,
VK_HEAP_CHECK_MAX_ENUM = VK_HEAP_CHECK_FRAGMENT_CONSTANTS_STORAGE,
VK_HEAP_CHECK_ALL = 0xFF,
};
enum frame_context_state : u32
{
dirty = 1
};
enum flush_queue_state : u32 enum flush_queue_state : u32
{ {
ok = 0, ok = 0,
deadlock = 1 deadlock = 1
}; };
class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private: private:
VKFragmentProgram m_fragment_prog; VKFragmentProgram m_fragment_prog;
VKVertexProgram m_vertex_prog; VKVertexProgram m_vertex_prog;
@ -382,15 +395,15 @@ private:
vk::occlusion_query_pool m_occlusion_query_pool; vk::occlusion_query_pool m_occlusion_query_pool;
bool m_occlusion_query_active = false; bool m_occlusion_query_active = false;
rsx::reports::occlusion_query_info *m_active_query_info = nullptr; rsx::reports::occlusion_query_info *m_active_query_info = nullptr;
std::vector<occlusion_data> m_occlusion_map; std::vector<vk::occlusion_data> m_occlusion_map;
shared_mutex m_secondary_cb_guard; shared_mutex m_secondary_cb_guard;
vk::command_pool m_secondary_command_buffer_pool; vk::command_pool m_secondary_command_buffer_pool;
vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations vk::command_buffer m_secondary_command_buffer; //command buffer used for setup operations
u32 m_current_cb_index = 0; u32 m_current_cb_index = 0;
std::array<command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list; std::array<vk::command_buffer_chunk, VK_MAX_ASYNC_CB_COUNT> m_primary_cb_list;
command_buffer_chunk* m_current_command_buffer = nullptr; vk::command_buffer_chunk* m_current_command_buffer = nullptr;
VkDescriptorSetLayout descriptor_layouts; VkDescriptorSetLayout descriptor_layouts;
VkPipelineLayout pipeline_layout; VkPipelineLayout pipeline_layout;
@ -421,13 +434,13 @@ private:
VkDescriptorBufferInfo m_fragment_constants_buffer_info; VkDescriptorBufferInfo m_fragment_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_texture_params_buffer_info; VkDescriptorBufferInfo m_fragment_texture_params_buffer_info;
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage; std::array<vk::frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage //Temp frame context to use if the real frame queue is overburdened. Only used for storage
frame_context_t m_aux_frame_context; vk::frame_context_t m_aux_frame_context;
u32 m_current_queue_index = 0; u32 m_current_queue_index = 0;
frame_context_t* m_current_frame = nullptr; vk::frame_context_t* m_current_frame = nullptr;
std::deque<frame_context_t*> m_queued_frames; std::deque<vk::frame_context_t*> m_queued_frames;
VkViewport m_viewport{}; VkViewport m_viewport{};
VkRect2D m_scissor{}; VkRect2D m_scissor{};
@ -435,7 +448,7 @@ private:
std::vector<u8> m_draw_buffers; std::vector<u8> m_draw_buffers;
shared_mutex m_flush_queue_mutex; shared_mutex m_flush_queue_mutex;
flush_request_task m_flush_requests; vk::flush_request_task m_flush_requests;
// Offloader thread deadlock recovery // Offloader thread deadlock recovery
rsx::atomic_bitmask_t<flush_queue_state> m_queue_status; rsx::atomic_bitmask_t<flush_queue_state> m_queue_status;
@ -471,11 +484,13 @@ private:
void flush_command_queue(bool hard_sync = false); void flush_command_queue(bool hard_sync = false);
void queue_swap_request(); void queue_swap_request();
void frame_context_cleanup(frame_context_t *ctx, bool free_resources = false); void frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources = false);
void advance_queued_frames(); void advance_queued_frames();
void present(frame_context_t *ctx); void present(vk::frame_context_t *ctx);
void reinitialize_swapchain(); void reinitialize_swapchain();
vk::image* get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig);
void begin_render_pass(); void begin_render_pass();
void close_render_pass(); void close_render_pass();

View file

@ -0,0 +1,692 @@
#include "stdafx.h"
#include "VKGSRender.h"
void VKGSRender::reinitialize_swapchain()
{
m_swapchain_dims.width = m_frame->client_width();
m_swapchain_dims.height = m_frame->client_height();
// Reject requests to acquire new swapchain if the window is minimized
// The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
// However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
if (m_swapchain_dims.width == 0 || m_swapchain_dims.height == 0)
{
swapchain_unavailable = true;
return;
}
// NOTE: This operation will create a hard sync point
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
for (auto &ctx : frame_context_storage)
{
if (ctx.present_image == UINT32_MAX)
continue;
// Release present image by presenting it
frame_context_cleanup(&ctx, true);
}
// Drain all the queues
vkDeviceWaitIdle(*m_device);
// Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call
if (!m_swapchain->init(m_swapchain_dims.width, m_swapchain_dims.height))
{
LOG_WARNING(RSX, "Swapchain initialization failed. Request ignored [%dx%d]", m_swapchain_dims.width, m_swapchain_dims.height);
swapchain_unavailable = true;
open_command_buffer();
return;
}
// Prepare new swapchain images for use
open_command_buffer();
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
{
const auto target_layout = m_swapchain->get_optimal_present_layout();
const auto target_image = m_swapchain->get_image(i);
VkClearColorValue clear_color{};
VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, target_layout, range);
}
//Will have to block until rendering is completed
vk::fence resize_fence(*m_device);
//Flush the command buffer
close_and_submit_command_buffer(&resize_fence);
vk::wait_for_fence(&resize_fence);
m_current_command_buffer->reset();
open_command_buffer();
swapchain_unavailable = false;
should_reinitialize_swapchain = false;
}
void VKGSRender::present(vk::frame_context_t *ctx)
{
verify(HERE), ctx->present_image != UINT32_MAX;
// Partial CS flush
ctx->swap_command_buffer->flush();
if (!swapchain_unavailable)
{
switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image))
{
case VK_SUCCESS:
break;
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
swapchain_unavailable = true;
break;
default:
vk::die_with_error(HERE, error);
}
}
// Presentation image released; reset value
ctx->present_image = UINT32_MAX;
}
void VKGSRender::advance_queued_frames()
{
// Check all other frames for completion and clear resources
check_present_status();
//m_rtts storage is double buffered and should be safe to tag on frame boundary
m_rtts.free_invalidated();
//texture cache is also double buffered to prevent use-after-free
m_texture_cache.on_frame_end();
m_samplers_dirty.store(true);
vk::remove_unused_framebuffers();
m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_vertex_env_ring_info.get_current_put_pos_minus_one(),
m_fragment_env_ring_info.get_current_put_pos_minus_one(),
m_vertex_layout_ring_info.get_current_put_pos_minus_one(),
m_fragment_texture_params_ring_info.get_current_put_pos_minus_one(),
m_fragment_constants_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
m_queued_frames.push_back(m_current_frame);
verify(HERE), m_queued_frames.size() <= VK_MAX_ASYNC_FRAMES;
m_current_queue_index = (m_current_queue_index + 1) % VK_MAX_ASYNC_FRAMES;
m_current_frame = &frame_context_storage[m_current_queue_index];
m_current_frame->flags |= frame_context_state::dirty;
vk::advance_frame_counter();
}
void VKGSRender::queue_swap_request()
{
verify(HERE), !m_current_frame->swap_command_buffer;
m_current_frame->swap_command_buffer = m_current_command_buffer;
if (m_swapchain->is_headless())
{
m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
}
else
{
close_and_submit_command_buffer(m_current_command_buffer->submit_fence,
m_current_frame->acquire_signal_semaphore,
m_current_frame->present_wait_semaphore,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
// Signal pending state as the command queue is now closed
m_current_frame->swap_command_buffer->pending = true;
// Set up a present request for this frame as well
present(m_current_frame);
// Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
// Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::frame_context_cleanup(vk::frame_context_t *ctx, bool free_resources)
{
verify(HERE), ctx->swap_command_buffer;
if (ctx->swap_command_buffer->pending)
{
// Perform hard swap here
if (ctx->swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT) != VK_SUCCESS)
{
// Lost surface/device, release swapchain
swapchain_unavailable = true;
}
free_resources = true;
}
if (free_resources)
{
if (g_cfg.video.overlay)
{
m_text_writer->reset_descriptors();
}
if (m_overlay_manager && m_overlay_manager->has_dirty())
{
m_overlay_manager->lock();
std::vector<u32> uids_to_dispose;
uids_to_dispose.reserve(m_overlay_manager->get_dirty().size());
for (const auto& view : m_overlay_manager->get_dirty())
{
m_ui_renderer->remove_temp_resources(view->uid);
uids_to_dispose.push_back(view->uid);
}
m_overlay_manager->unlock();
m_overlay_manager->dispose(uids_to_dispose);
}
vk::reset_global_resources();
m_attachment_clear_pass->free_resources();
m_depth_converter->free_resources();
m_ui_renderer->free_resources();
m_video_output_pass->free_resources();
ctx->buffer_views_to_clean.clear();
if (ctx->last_frame_sync_time > m_last_heap_sync_time)
{
m_last_heap_sync_time = ctx->last_frame_sync_time;
//Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_vertex_env_ring_info.m_get_pos = ctx->vtx_env_heap_ptr;
m_fragment_env_ring_info.m_get_pos = ctx->frag_env_heap_ptr;
m_fragment_constants_ring_info.m_get_pos = ctx->frag_const_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtx_const_heap_ptr;
m_vertex_layout_ring_info.m_get_pos = ctx->vtx_layout_heap_ptr;
m_fragment_texture_params_ring_info.m_get_pos = ctx->frag_texparam_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify();
m_vertex_env_ring_info.notify();
m_fragment_env_ring_info.notify();
m_fragment_constants_ring_info.notify();
m_transform_constants_ring_info.notify();
m_vertex_layout_ring_info.notify();
m_fragment_texture_params_ring_info.notify();
m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify();
}
}
ctx->swap_command_buffer = nullptr;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
vk::image* VKGSRender::get_present_source(vk::present_surface_info* info, const rsx::avconf* avconfig)
{
vk::image* image_to_flip = nullptr;
if (auto render_target_texture = m_rtts.get_color_surface_at(info->address))
{
if (render_target_texture->last_use_tag == m_rtts.write_tag)
{
image_to_flip = render_target_texture;
}
else
{
const auto overlap_info = m_rtts.get_merged_texture_memory_region(*m_current_command_buffer, info->address, info->width, info->height, info->pitch, render_target_texture->get_bpp(), rsx::surface_access::read);
if (!overlap_info.empty() && overlap_info.back().surface == render_target_texture)
{
// Confirmed to be the newest data source in that range
image_to_flip = render_target_texture;
}
}
if (image_to_flip)
{
const auto buffer_width = rsx::apply_resolution_scale(info->width, true);
const auto buffer_height = rsx::apply_resolution_scale(info->height, true);
if (buffer_width > render_target_texture->width() ||
buffer_height > render_target_texture->height())
{
// TODO: Should emit only once to avoid flooding the log file
// TODO: Take AA scaling into account
LOG_WARNING(RSX, "Selected output image does not satisfy the video configuration. Display buffer resolution=%dx%d, avconf resolution=%dx%d, surface=%dx%d",
info->width, info->height,
avconfig->state * avconfig->resolution_x, avconfig->state * avconfig->resolution_y,
render_target_texture->get_surface_width(rsx::surface_metrics::pixels), render_target_texture->get_surface_height(rsx::surface_metrics::pixels));
info->width = render_target_texture->width();
info->height = render_target_texture->height();
}
else
{
info->width = buffer_width;
info->height = buffer_height;
}
}
}
else if (auto surface = m_texture_cache.find_texture_from_dimensions<true>(info->address, info->format, info->width, info->height))
{
//Hack - this should be the first location to check for output
//The render might have been done offscreen or in software and a blit used to display
image_to_flip = surface->get_raw_texture();
}
if (!image_to_flip)
{
// Read from cell
const auto range = utils::address_range::start_length(info->address, info->pitch * info->height);
const u32 lookup_mask = rsx::texture_upload_context::blit_engine_dst | rsx::texture_upload_context::framebuffer_storage;
const auto overlap = m_texture_cache.find_texture_from_range<true>(range, 0, lookup_mask);
for (const auto & section : overlap)
{
if (!section->is_synchronized())
{
section->copy_texture(*m_current_command_buffer, true);
}
}
if (m_current_command_buffer->flags & vk::command_buffer::cb_has_dma_transfer)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(*m_current_command_buffer, range, rsx::invalidation_cause::read);
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, info->address, info->width, info->height);
}
return image_to_flip;
}
void VKGSRender::flip(const rsx::display_flip_info_t& info)
{
// Check swapchain condition/status
if (!m_swapchain->supports_automatic_wm_reports())
{
if (m_swapchain_dims.width != m_frame->client_width() ||
m_swapchain_dims.height != m_frame->client_height())
{
swapchain_unavailable = true;
}
}
if (swapchain_unavailable || should_reinitialize_swapchain)
{
reinitialize_swapchain();
}
m_profiler.start();
if (m_current_frame == &m_aux_frame_context)
{
m_current_frame = &frame_context_storage[m_current_queue_index];
if (m_current_frame->swap_command_buffer)
{
// Its possible this flip request is triggered by overlays and the flip queue is in undefined state
frame_context_cleanup(m_current_frame, true);
}
// Swap aux storage and current frame; aux storage should always be ready for use at all times
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
else if (m_current_frame->swap_command_buffer)
{
if (info.stats.draw_calls > 0)
{
// This can be 'legal' if the window was being resized and no polling happened because of swapchain_unavailable flag
LOG_ERROR(RSX, "Possible data corruption on frame context storage detected");
}
// There were no draws and back-to-back flips happened
frame_context_cleanup(m_current_frame, true);
}
if (info.skip_frame || swapchain_unavailable)
{
if (!info.skip_frame)
{
verify(HERE), swapchain_unavailable;
// Perform a mini-flip here without invoking present code
m_current_frame->swap_command_buffer = m_current_command_buffer;
flush_command_queue(true);
vk::advance_frame_counter();
frame_context_cleanup(m_current_frame, true);
}
m_frame->flip(m_context);
rsx::thread::flip(info);
return;
}
u32 buffer_width = display_buffers[info.buffer].width;
u32 buffer_height = display_buffers[info.buffer].height;
u32 buffer_pitch = display_buffers[info.buffer].pitch;
u32 av_format;
const auto avconfig = g_fxo->get<rsx::avconf>();
if (avconfig->state)
{
av_format = avconfig->get_compatible_gcm_format();
if (!buffer_pitch)
buffer_pitch = buffer_width * avconfig->get_bpp();
buffer_width = std::min(buffer_width, avconfig->resolution_x);
buffer_height = std::min(buffer_height, avconfig->resolution_y);
}
else
{
av_format = CELL_GCM_TEXTURE_A8R8G8B8;
if (!buffer_pitch)
buffer_pitch = buffer_width * 4;
}
coordi aspect_ratio;
sizei csize = m_swapchain_dims;
sizei new_size = csize;
if (!g_cfg.video.stretch_to_display_area)
{
const double aq = 1. * buffer_width / buffer_height;
const double rq = 1. * new_size.width / new_size.height;
const double q = aq / rq;
if (q > 1.0)
{
new_size.height = static_cast<int>(new_size.height / q);
aspect_ratio.y = (csize.height - new_size.height) / 2;
}
else if (q < 1.0)
{
new_size.width = static_cast<int>(new_size.width * q);
aspect_ratio.x = (csize.width - new_size.width) / 2;
}
}
aspect_ratio.size = new_size;
//Prepare surface for new frame. Set no timeout here so that we wait for the next image if need be
verify(HERE), m_current_frame->present_image == UINT32_MAX;
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull;
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image))
{
switch (status)
{
case VK_TIMEOUT:
case VK_NOT_READY:
{
//In some cases, after a fullscreen switch, the driver only allows N-1 images to be acquirable, where N = number of available swap images.
//This means that any acquired images have to be released
//before acquireNextImage can return successfully. This is despite the driver reporting 2 swap chain images available
//This makes fullscreen performance slower than windowed performance as throughput is lowered due to losing one presentable image
//Found on AMD Crimson 17.7.2
//Whatever returned from status, this is now a spin
timeout = 0ull;
check_present_status();
continue;
}
case VK_SUBOPTIMAL_KHR:
should_reinitialize_swapchain = true;
break;
case VK_ERROR_OUT_OF_DATE_KHR:
LOG_WARNING(RSX, "vkAcquireNextImageKHR failed with VK_ERROR_OUT_OF_DATE_KHR. Flip request ignored until surface is recreated.");
swapchain_unavailable = true;
reinitialize_swapchain();
continue;
default:
vk::die_with_error(HERE, status);
}
}
//Confirm that the driver did not silently fail
verify(HERE), m_current_frame->present_image != UINT32_MAX;
//Blit contents to screen..
vk::image* image_to_flip = nullptr;
if (info.buffer < display_buffers_count && buffer_width && buffer_height)
{
vk::present_surface_info present_info;
present_info.width = buffer_width;
present_info.height = buffer_height;
present_info.pitch = buffer_pitch;
present_info.format = av_format;
present_info.address = rsx::get_address(display_buffers[info.buffer].offset, CELL_GCM_LOCATION_LOCAL);
image_to_flip = get_present_source(&present_info, avconfig);
buffer_width = present_info.width;
buffer_height = present_info.height;
}
VkImage target_image = m_swapchain->get_image(m_current_frame->present_image);
const auto present_layout = m_swapchain->get_optimal_present_layout();
const VkImageSubresourceRange subresource_range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
VkImageLayout target_layout = present_layout;
VkRenderPass single_target_pass = VK_NULL_HANDLE;
vk::framebuffer_holder* direct_fbo = nullptr;
vk::viewable_image* calibration_src = nullptr;
if (image_to_flip)
{
if (aspect_ratio.x || aspect_ratio.y)
{
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (UNLIKELY(!g_cfg.video.full_rgb_range_output || !rsx::fcmp(avconfig->gamma, 1.f)))
{
calibration_src = dynamic_cast<vk::viewable_image*>(image_to_flip);
verify("Image handle not viewable!" HERE), calibration_src;
}
if (LIKELY(!calibration_src))
{
vk::copy_scaled_image(*m_current_command_buffer, image_to_flip->value, target_image, image_to_flip->current_layout, target_layout,
{ 0, 0, static_cast<s32>(buffer_width), static_cast<s32>(buffer_height) }, aspect_ratio, 1, VK_IMAGE_ASPECT_COLOR_BIT, false);
}
else
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, subresource_range);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
direct_fbo->add_ref();
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_video_output_pass->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, calibration_src, avconfig->gamma, !g_cfg.video.full_rgb_range_output, single_target_pass);
image_to_flip->pop_layout(*m_current_command_buffer);
direct_fbo->release();
}
}
else
{
//No draw call was issued!
//TODO: Upload raw bytes from cpu for rendering
VkClearColorValue clear_black {};
vk::change_image_layout(*m_current_command_buffer, target_image, present_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, subresource_range);
vkCmdClearColorImage(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_black, 1, &subresource_range);
target_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
if (m_frame->screenshot_toggle == true)
{
m_frame->screenshot_toggle = false;
const size_t sshot_size = buffer_height * buffer_width * 4;
vk::buffer sshot_vkbuf(*m_device, align(sshot_size, 0x100000), m_device->get_memory_mapping().host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0);
VkBufferImageCopy copy_info;
copy_info.bufferOffset = 0;
copy_info.bufferRowLength = 0;
copy_info.bufferImageHeight = 0;
copy_info.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
copy_info.imageSubresource.baseArrayLayer = 0;
copy_info.imageSubresource.layerCount = 1;
copy_info.imageSubresource.mipLevel = 0;
copy_info.imageOffset.x = 0;
copy_info.imageOffset.y = 0;
copy_info.imageOffset.z = 0;
copy_info.imageExtent.width = buffer_width;
copy_info.imageExtent.height = buffer_height;
copy_info.imageExtent.depth = 1;
image_to_flip->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
vk::copy_image_to_buffer(*m_current_command_buffer, image_to_flip, &sshot_vkbuf, copy_info);
image_to_flip->pop_layout(*m_current_command_buffer);
flush_command_queue(true);
auto src = sshot_vkbuf.map(0, sshot_size);
std::vector<u8> sshot_frame(sshot_size);
memcpy(sshot_frame.data(), src, sshot_size);
sshot_vkbuf.unmap();
m_frame->take_screenshot(std::move(sshot_frame), buffer_width, buffer_height);
}
const bool has_overlay = (m_overlay_manager && m_overlay_manager->has_visible());
if (g_cfg.video.overlay || has_overlay)
{
if (target_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL)
{
// Change the image layout whilst setting up a dependency on waiting for the blit op to finish before we start writing
VkImageMemoryBarrier barrier = {};
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
barrier.oldLayout = target_layout;
barrier.image = target_image;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.subresourceRange = subresource_range;
vkCmdPipelineBarrier(*m_current_command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
target_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
if (!direct_fbo)
{
const auto key = vk::get_renderpass_key(m_swapchain->get_surface_format());
single_target_pass = vk::get_renderpass(*m_device, key);
verify("Usupported renderpass configuration" HERE), single_target_pass != VK_NULL_HANDLE;
direct_fbo = vk::get_framebuffer(*m_device, m_swapchain_dims.width, m_swapchain_dims.height, single_target_pass, m_swapchain->get_surface_format(), target_image);
}
direct_fbo->add_ref();
if (has_overlay)
{
// Lock to avoid modification during run-update chain
std::lock_guard lock(*m_overlay_manager);
for (const auto& view : m_overlay_manager->get_views())
{
m_ui_renderer->run(*m_current_command_buffer, areau(aspect_ratio), direct_fbo, single_target_pass, m_texture_upload_buffer_ring_info, *view.get());
}
}
if (g_cfg.video.overlay)
{
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), fmt::format("RSX Load: %3d%%", get_load()));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), fmt::format("draw calls: %17d", info.stats.draw_calls));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call setup: %12dus", info.stats.setup_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 54, direct_fbo->width(), direct_fbo->height(), fmt::format("vertex upload time: %9dus", info.stats.vertex_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), fmt::format("texture upload time: %8dus", info.stats.textures_upload_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), fmt::format("draw call execution: %8dus", info.stats.draw_exec_time));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), fmt::format("submit and flip: %12dus", info.stats.flip_time));
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto num_speculate = m_texture_cache.get_num_cache_speculative_writes();
const auto num_misses = m_texture_cache.get_num_cache_misses();
const auto num_unavoidable = m_texture_cache.get_num_unavoidable_hard_faults();
const auto cache_miss_ratio = static_cast<u32>(ceil(m_texture_cache.get_cache_miss_ratio() * 100));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), fmt::format("Unreleased textures: %8d", num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), fmt::format("Texture cache memory: %7dM", texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Temporary texture memory: %3dM", tmp_texture_memory_size));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 198, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %13d = %2d (%3d%%) hard faults, %2d unavoidable, %2d misprediction(s), %2d speculation(s)", num_flushes, num_misses, cache_miss_ratio, num_unavoidable, num_mispredict, num_speculate));
}
direct_fbo->release();
}
if (target_layout != present_layout)
{
vk::change_image_layout(*m_current_command_buffer, target_image, target_layout, present_layout, subresource_range);
}
queue_swap_request();
m_frame_stats.flip_time = m_profiler.duration();
m_frame->flip(m_context);
rsx::thread::flip(info);
}

View file

@ -93,6 +93,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" /> <ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" /> <ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" /> <ClCompile Include="Emu\RSX\GL\GLHelpers.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" /> <ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" /> <ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLTexture.cpp" /> <ClCompile Include="Emu\RSX\GL\GLTexture.cpp" />

View file

@ -8,6 +8,7 @@
<ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" /> <ClCompile Include="Emu\RSX\GL\GLGSRender.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" /> <ClCompile Include="Emu\RSX\GL\GLVertexProgram.cpp" />
<ClCompile Include="Emu\RSX\GL\OpenGL.cpp" /> <ClCompile Include="Emu\RSX\GL\OpenGL.cpp" />
<ClCompile Include="Emu\RSX\GL\GLPresent.cpp" />
<ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" /> <ClCompile Include="Emu\RSX\GL\GLRenderTargets.cpp" />
<ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" /> <ClCompile Include="Emu\RSX\GL\GLVertexBuffers.cpp" />
</ItemGroup> </ItemGroup>

View file

@ -52,6 +52,7 @@
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" /> <ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" /> <ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" /> <ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" /> <ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" /> <ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" /> <ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />

View file

@ -8,6 +8,7 @@
<ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" /> <ClCompile Include="Emu\RSX\VK\VKFramebuffer.cpp" />
<ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" /> <ClCompile Include="Emu\RSX\VK\VKGSRender.cpp" />
<ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" /> <ClCompile Include="Emu\RSX\VK\VKHelpers.cpp" />
<ClCompile Include="Emu\RSX\VK\VKPresent.cpp" />
<ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" /> <ClCompile Include="Emu\RSX\VK\VKProgramPipeline.cpp" />
<ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" /> <ClCompile Include="Emu\RSX\VK\VKRenderPass.cpp" />
<ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" /> <ClCompile Include="Emu\RSX\VK\VKResolveHelper.cpp" />