vk/rsx: Bug fixes and improvements

- Improvements to framebuffer usage; Avoid creating new resources every frame
- Handle null fragment program properly
- Collect vertex upload statistics

- vk: Pre-initialize 'unused' varying registers in the vertex shader in case it gets matched with a fs that consumes it
 -- Fixes a crash about fog_c not being declared

gl/dx12/vk: Handle null fragment program

- cleanup - use yield semantic instead of sleep(0) as yield is more cross-platform
 -- sleep(0) is a windows specific scheduler hint
This commit is contained in:
kd-11 2017-07-13 00:49:50 +03:00
parent 72e13ddeb2
commit 05ffb50037
11 changed files with 317 additions and 97 deletions

View file

@ -324,6 +324,17 @@ void D3D12GSRender::end()
{ {
std::chrono::time_point<steady_clock> start_duration = steady_clock::now(); std::chrono::time_point<steady_clock> start_duration = steady_clock::now();
std::chrono::time_point<steady_clock> program_load_start = steady_clock::now();
load_program();
std::chrono::time_point<steady_clock> program_load_end = steady_clock::now();
m_timers.program_load_duration += std::chrono::duration_cast<std::chrono::microseconds>(program_load_end - program_load_start).count();
if (!m_fragment_program.valid)
{
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> rtt_duration_start = steady_clock::now(); std::chrono::time_point<steady_clock> rtt_duration_start = steady_clock::now();
prepare_render_targets(get_current_resource_storage().command_list.Get()); prepare_render_targets(get_current_resource_storage().command_list.Get());
@ -344,11 +355,6 @@ void D3D12GSRender::end()
std::chrono::time_point<steady_clock> vertex_index_duration_end = steady_clock::now(); std::chrono::time_point<steady_clock> vertex_index_duration_end = steady_clock::now();
m_timers.vertex_index_duration += std::chrono::duration_cast<std::chrono::microseconds>(vertex_index_duration_end - vertex_index_duration_start).count(); m_timers.vertex_index_duration += std::chrono::duration_cast<std::chrono::microseconds>(vertex_index_duration_end - vertex_index_duration_start).count();
std::chrono::time_point<steady_clock> program_load_start = steady_clock::now();
load_program();
std::chrono::time_point<steady_clock> program_load_end = steady_clock::now();
m_timers.program_load_duration += std::chrono::duration_cast<std::chrono::microseconds>(program_load_end - program_load_start).count();
get_current_resource_storage().command_list->SetGraphicsRootSignature(m_shared_root_signature.Get()); get_current_resource_storage().command_list->SetGraphicsRootSignature(m_shared_root_signature.Get());
get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers.stencil_func_ref()); get_current_resource_storage().command_list->OMSetStencilRef(rsx::method_registers.stencil_func_ref());

View file

@ -56,6 +56,9 @@ void D3D12GSRender::load_program()
m_vertex_program = get_current_vertex_program(); m_vertex_program = get_current_vertex_program();
m_fragment_program = get_current_fragment_program(rtt_lookup_func); m_fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!m_fragment_program.valid)
return;
D3D12PipelineProperties prop = {}; D3D12PipelineProperties prop = {};
prop.Topology = get_primitive_topology_type(rsx::method_registers.current_draw_clause.primitive); prop.Topology = get_primitive_topology_type(rsx::method_registers.current_draw_clause.primitive);

View file

@ -322,17 +322,15 @@ namespace
void GLGSRender::end() void GLGSRender::end()
{ {
if (skip_frame || !framebuffer_status_valid) std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
if (skip_frame || !framebuffer_status_valid || !load_program())
{ {
rsx::thread::end(); rsx::thread::end();
return; return;
} }
std::chrono::time_point<steady_clock> program_start = steady_clock::now();
//Load program here since it is dependent on vertex state
load_program();
std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -841,8 +839,10 @@ bool GLGSRender::load_program()
return std::make_tuple(true, surface->get_native_pitch()); return std::make_tuple(true, surface->get_native_pitch());
}; };
RSXVertexProgram vertex_program = get_current_vertex_program();
RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func); RSXFragmentProgram fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!fragment_program.valid) return false;
RSXVertexProgram vertex_program = get_current_vertex_program();
u32 unnormalized_rtts = 0; u32 unnormalized_rtts = 0;

View file

@ -239,6 +239,8 @@ struct RSXFragmentProgram
u8 textures_alpha_kill[16]; u8 textures_alpha_kill[16];
u32 textures_zfunc[16]; u32 textures_zfunc[16];
bool valid;
rsx::texture_dimension_extended get_texture_dimension(u8 id) const rsx::texture_dimension_extended get_texture_dimension(u8 id) const
{ {
return (rsx::texture_dimension_extended)((texture_dimensions >> (id * 2)) & 0x3); return (rsx::texture_dimension_extended)((texture_dimensions >> (id * 2)) & 0x3);
@ -263,6 +265,7 @@ struct RSXFragmentProgram
, ctrl(0) , ctrl(0)
, unnormalized_coords(0) , unnormalized_coords(0)
, texture_dimensions(0) , texture_dimensions(0)
, valid(false)
{ {
} }
}; };

View file

@ -974,9 +974,17 @@ namespace rsx
RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const RSXFragmentProgram thread::get_current_fragment_program(std::function<std::tuple<bool, u16>(u32, fragment_texture&, bool)> get_surface_info) const
{ {
RSXFragmentProgram result = {}; RSXFragmentProgram result = {};
u32 shader_program = rsx::method_registers.shader_program_address();
result.offset = shader_program & ~0x3; const u32 shader_program = rsx::method_registers.shader_program_address();
result.addr = vm::base(rsx::get_address(result.offset, (shader_program & 0x3) - 1)); if (shader_program == 0)
return result;
const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3);
result.offset = program_offset;
result.addr = vm::base(rsx::get_address(program_offset, program_location));
result.valid = true;
result.ctrl = rsx::method_registers.shader_control(); result.ctrl = rsx::method_registers.shader_control();
result.unnormalized_coords = 0; result.unnormalized_coords = 0;
result.front_back_color_enabled = !rsx::method_registers.two_side_light_en(); result.front_back_color_enabled = !rsx::method_registers.two_side_light_en();
@ -1174,26 +1182,26 @@ namespace rsx
if (packet.post_upload_func) if (packet.post_upload_func)
packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, task.vertex_count); packet.post_upload_func(packet.dst_span.data(), packet.type, (u8)packet.vector_width, task.vertex_count);
_mm_sfence();
task.remaining_packets--; task.remaining_packets--;
current_job += step; current_job += step;
_mm_sfence();
} }
_mm_mfence(); _mm_mfence();
while (task.remaining_packets > 0 && !Emu.IsStopped()) while (task.remaining_packets > 0 && !Emu.IsStopped())
{ {
std::this_thread::yield();
_mm_lfence(); _mm_lfence();
std::this_thread::sleep_for(0us);
} }
_mm_sfence();
task.ready_threads++; task.ready_threads++;
_mm_sfence();
} }
else else
std::this_thread::sleep_for(0us); {
//thread_ctrl::wait(); std::this_thread::yield();
//busy_wait(); }
} }
}); });
} }
@ -1201,8 +1209,7 @@ namespace rsx
while (m_vertex_streaming_task.ready_threads != 0 && !Emu.IsStopped()) while (m_vertex_streaming_task.ready_threads != 0 && !Emu.IsStopped())
{ {
_mm_lfence(); _mm_pause();
busy_wait();
} }
m_vertex_streaming_task.vertex_count = vertex_count; m_vertex_streaming_task.vertex_count = vertex_count;
@ -1214,8 +1221,7 @@ namespace rsx
{ {
while (m_vertex_streaming_task.remaining_packets > 0 && !Emu.IsStopped()) while (m_vertex_streaming_task.remaining_packets > 0 && !Emu.IsStopped())
{ {
_mm_lfence(); _mm_pause();
busy_wait();
} }
m_vertex_streaming_task.packets.resize(0); m_vertex_streaming_task.packets.resize(0);

View file

@ -677,6 +677,7 @@ VKGSRender::~VKGSRender()
m_buffer_view_to_clean.clear(); m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear(); m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear(); m_framebuffer_to_clean.clear();
m_draw_fbo.reset();
//Render passes //Render passes
for (auto &render_pass : m_render_passes) for (auto &render_pass : m_render_passes)
@ -880,11 +881,11 @@ void VKGSRender::begin_render_pass()
VkRenderPassBeginInfo rp_begin = {}; VkRenderPassBeginInfo rp_begin = {};
rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
rp_begin.renderPass = current_render_pass; rp_begin.renderPass = current_render_pass;
rp_begin.framebuffer = m_framebuffer_to_clean.back()->value; rp_begin.framebuffer = m_draw_fbo->value;
rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.x = 0;
rp_begin.renderArea.offset.y = 0; rp_begin.renderArea.offset.y = 0;
rp_begin.renderArea.extent.width = m_framebuffer_to_clean.back()->width(); rp_begin.renderArea.extent.width = m_draw_fbo->width();
rp_begin.renderArea.extent.height = m_framebuffer_to_clean.back()->height(); rp_begin.renderArea.extent.height = m_draw_fbo->height();
vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); vkCmdBeginRenderPass(*m_current_command_buffer, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
render_pass_open = true; render_pass_open = true;
@ -941,10 +942,15 @@ void VKGSRender::end()
} }
//Load program here since it is dependent on vertex state //Load program here since it is dependent on vertex state
load_program(is_instanced); if (!load_program(is_instanced))
{
LOG_ERROR(RSX, "No valid program bound to pipeline. Skipping draw");
rsx::thread::end();
return;
}
std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); //m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
if (is_instanced) if (is_instanced)
{ {
@ -1123,7 +1129,7 @@ void VKGSRender::end()
depth_clear_value.depthStencil.depth = 1.f; depth_clear_value.depthStencil.depth = 1.f;
depth_clear_value.depthStencil.stencil = 255; depth_clear_value.depthStencil.stencil = 255;
VkClearRect clear_rect = { 0, 0, m_framebuffer_to_clean.back()->width(), m_framebuffer_to_clean.back()->height(), 0, 1 }; VkClearRect clear_rect = { 0, 0, m_draw_fbo->width(), m_draw_fbo->height(), 0, 1 };
VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value }; VkClearAttachment clear_desc = { ds->attachment_aspect_flag, 0, depth_clear_value };
vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect); vkCmdClearAttachments(*m_current_command_buffer, 1, &clear_desc, 1, &clear_rect);
@ -1133,18 +1139,15 @@ void VKGSRender::end()
std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info); std::optional<std::tuple<VkDeviceSize, VkIndexType> > index_info = std::get<2>(upload_info);
if (m_attrib_ring_info.mapped)
{
wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now(); std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count(); m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - textures_end).count();
if (!index_info) if (!index_info)
{ {
vkCmdDraw(*m_current_command_buffer, std::get<1>(upload_info), 1, 0, 0); const auto vertex_count = std::get<1>(upload_info);
vkCmdDraw(*m_current_command_buffer, vertex_count, 1, 0, 0);
m_last_vertex_count = vertex_count;
m_last_draw_indexed = false; m_last_draw_indexed = false;
} }
else else
@ -1175,6 +1178,22 @@ void VKGSRender::end()
copy_render_targets_to_dma_location(); copy_render_targets_to_dma_location();
m_draw_calls++; m_draw_calls++;
if (g_cfg.video.overlay)
{
if (m_last_vertex_count < 1024)
m_uploads_small++;
else if (m_last_vertex_count < 2048)
m_uploads_1k++;
else if (m_last_vertex_count < 4096)
m_uploads_2k++;
else if (m_last_vertex_count < 8192)
m_uploads_4k++;
else if (m_last_vertex_count < 16384)
m_uploads_8k++;
else
m_uploads_16k++;
}
rsx::thread::end(); rsx::thread::end();
} }
@ -1260,8 +1279,8 @@ void VKGSRender::clear_surface(u32 mask)
u16 scissor_y = rsx::method_registers.scissor_origin_y(); u16 scissor_y = rsx::method_registers.scissor_origin_y();
u16 scissor_h = rsx::method_registers.scissor_height(); u16 scissor_h = rsx::method_registers.scissor_height();
const u32 fb_width = m_framebuffer_to_clean.back()->width(); const u32 fb_width = m_draw_fbo->width();
const u32 fb_height = m_framebuffer_to_clean.back()->height(); const u32 fb_height = m_draw_fbo->height();
//clip region //clip region
std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true); std::tie(scissor_x, scissor_y, scissor_w, scissor_h) = rsx::clip_region<u16>(fb_width, fb_height, scissor_x, scissor_y, scissor_w, scissor_h, true);
@ -1392,6 +1411,12 @@ void VKGSRender::copy_render_targets_to_dma_location()
void VKGSRender::flush_command_queue(bool hard_sync) void VKGSRender::flush_command_queue(bool hard_sync)
{ {
if (m_attrib_ring_info.mapped)
{
wait_for_vertex_upload_task();
m_attrib_ring_info.unmap();
}
close_render_pass(); close_render_pass();
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence); close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
@ -1480,7 +1505,13 @@ void VKGSRender::process_swap_request()
m_buffer_view_to_clean.clear(); m_buffer_view_to_clean.clear();
m_sampler_to_clean.clear(); m_sampler_to_clean.clear();
m_framebuffer_to_clean.clear();
m_framebuffer_to_clean.remove_if([](std::unique_ptr<vk::framebuffer_holder>& fbo)
{
if (fbo->deref_count >= 2) return true;
fbo->deref_count++;
return false;
});
if (g_cfg.video.overlay) if (g_cfg.video.overlay)
{ {
@ -1545,8 +1576,10 @@ bool VKGSRender::load_program(bool fast_update)
return std::make_tuple(true, surface->native_pitch); return std::make_tuple(true, surface->native_pitch);
}; };
vertex_program = get_current_vertex_program();
fragment_program = get_current_fragment_program(rtt_lookup_func); fragment_program = get_current_fragment_program(rtt_lookup_func);
if (!fragment_program.valid) return false;
vertex_program = get_current_vertex_program();
vk::pipeline_props properties = {}; vk::pipeline_props properties = {};
@ -1864,6 +1897,35 @@ void VKGSRender::prepare_rtts()
const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(), const u32 surface_pitchs[] = { rsx::method_registers.surface_a_pitch(), rsx::method_registers.surface_b_pitch(),
rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() }; rsx::method_registers.surface_c_pitch(), rsx::method_registers.surface_d_pitch() };
if (m_draw_fbo)
{
const u32 fb_width = m_draw_fbo->width();
const u32 fb_height = m_draw_fbo->height();
bool really_changed = false;
if (fb_width == clip_width && fb_height == clip_height)
{
for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i)
{
if (m_surface_info[i].address != surface_addresses[i])
{
really_changed = true;
break;
}
}
if (!really_changed)
{
if (zeta_address == m_depth_surface_info.address)
{
//Nothing has changed, we're still using the same framebuffer
return;
}
}
}
}
m_rtts.prepare_render_target(&*m_current_command_buffer, m_rtts.prepare_render_target(&*m_current_command_buffer,
rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(), rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(),
clip_width, clip_height, clip_width, clip_height,
@ -1887,20 +1949,16 @@ void VKGSRender::prepare_rtts()
//Bind created rtts as current fbo... //Bind created rtts as current fbo...
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target()); std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
std::vector<std::unique_ptr<vk::image_view>> fbo_images;
//Search old framebuffers for this same configuration
bool framebuffer_found = false;
std::vector<vk::image*> bound_images;
bound_images.reserve(5);
for (u8 index : draw_buffers) for (u8 index : draw_buffers)
{ {
vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]); bound_images.push_back(std::get<1>(m_rtts.m_bound_render_targets[index]));
VkImageSubresourceRange subres = {};
subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
m_surface_info[index].address = surface_addresses[index]; m_surface_info[index].address = surface_addresses[index];
m_surface_info[index].pitch = surface_pitchs[index]; m_surface_info[index].pitch = surface_pitchs[index];
@ -1913,20 +1971,9 @@ void VKGSRender::prepare_rtts()
} }
} }
m_draw_buffers_count = static_cast<u32>(fbo_images.size()); if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0)
if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr)
{ {
vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil)); bound_images.push_back(std::get<1>(m_rtts.m_bound_depth_stencil));
VkImageSubresourceRange subres = {};
subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
m_depth_surface_info.address = zeta_address; m_depth_surface_info.address = zeta_address;
m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch(); m_depth_surface_info.pitch = rsx::method_registers.surface_z_pitch();
@ -1935,6 +1982,8 @@ void VKGSRender::prepare_rtts()
m_depth_surface_info.pitch = 0; m_depth_surface_info.pitch = 0;
} }
m_draw_buffers_count = static_cast<u32>(bound_images.size());
if (g_cfg.video.write_color_buffers) if (g_cfg.video.write_color_buffers)
{ {
for (u8 index : draw_buffers) for (u8 index : draw_buffers)
@ -1943,7 +1992,7 @@ void VKGSRender::prepare_rtts()
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height; const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height); m_surface_info[index].width, m_surface_info[index].height);
} }
} }
@ -1960,10 +2009,59 @@ void VKGSRender::prepare_rtts()
} }
} }
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size()); for (auto &fbo : m_framebuffer_to_clean)
VkRenderPass current_render_pass = m_render_passes[idx]; {
if (fbo->matches(bound_images, clip_width, clip_height))
{
m_draw_fbo.swap(fbo);
m_draw_fbo->reset_refs();
framebuffer_found = true;
//LOG_ERROR(RSX, "Matching framebuffer exists, using that instead");
break;
}
}
m_framebuffer_to_clean.push_back(std::make_unique<vk::framebuffer>(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images))); if (!framebuffer_found)
{
std::vector<std::unique_ptr<vk::image_view>> fbo_images;
fbo_images.reserve(5);
for (u8 index : draw_buffers)
{
vk::image *raw = std::get<1>(m_rtts.m_bound_render_targets[index]);
VkImageSubresourceRange subres = {};
subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
}
if (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr)
{
vk::image *raw = (std::get<1>(m_rtts.m_bound_depth_stencil));
VkImageSubresourceRange subres = {};
subres.aspectMask = (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z24s8) ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_DEPTH_BIT;
subres.baseArrayLayer = 0;
subres.baseMipLevel = 0;
subres.layerCount = 1;
subres.levelCount = 1;
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
}
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
VkRenderPass current_render_pass = m_render_passes[idx];
if (m_draw_fbo)
m_framebuffer_to_clean.push_back(std::move(m_draw_fbo));
m_draw_fbo.reset(new vk::framebuffer_holder(*m_device, current_render_pass, clip_width, clip_height, std::move(fbo_images)));
}
} }
@ -1982,6 +2080,13 @@ void VKGSRender::flip(int buffer)
m_setup_time = 0; m_setup_time = 0;
m_vertex_upload_time = 0; m_vertex_upload_time = 0;
m_textures_upload_time = 0; m_textures_upload_time = 0;
m_uploads_small = 0;
m_uploads_1k = 0;
m_uploads_2k = 0;
m_uploads_4k = 0;
m_uploads_8k = 0;
m_uploads_16k = 0;
} }
return; return;
@ -2061,7 +2166,7 @@ void VKGSRender::flip(int buffer)
vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range); vk::change_image_layout(*m_current_command_buffer, m_swap_chain->get_swap_chain_image(m_current_present_image), VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, range);
} }
std::unique_ptr<vk::framebuffer> direct_fbo; std::unique_ptr<vk::framebuffer_holder> direct_fbo;
std::vector<std::unique_ptr<vk::image_view>> swap_image_view; std::vector<std::unique_ptr<vk::image_view>> swap_image_view;
if (g_cfg.video.overlay) if (g_cfg.video.overlay)
{ {
@ -2083,9 +2188,24 @@ void VKGSRender::flip(int buffer)
size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1); size_t idx = vk::get_render_pass_location(m_swap_chain->get_surface_format(), VK_FORMAT_UNDEFINED, 1);
VkRenderPass single_target_pass = m_render_passes[idx]; VkRenderPass single_target_pass = m_render_passes[idx];
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres)); for (auto &It = m_framebuffer_to_clean.begin(); It != m_framebuffer_to_clean.end(); It++)
direct_fbo.reset(new vk::framebuffer(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view))); {
auto &fbo = *It;
if (fbo->attachments[0]->info.image == target_image)
{
direct_fbo.swap(fbo);
direct_fbo->reset_refs();
m_framebuffer_to_clean.erase(It);
break;
}
}
if (!direct_fbo)
{
swap_image_view.push_back(std::make_unique<vk::image_view>(*m_device, target_image, VK_IMAGE_VIEW_TYPE_2D, m_swap_chain->get_surface_format(), vk::default_component_map(), subres));
direct_fbo.reset(new vk::framebuffer_holder(*m_device, single_target_pass, m_client_width, m_client_height, std::move(swap_image_view)));
}
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls) + ", instanced repeats: " + std::to_string(m_instanced_draws)); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 0, direct_fbo->width(), direct_fbo->height(), "draw calls: " + std::to_string(m_draw_calls) + ", instanced repeats: " + std::to_string(m_instanced_draws));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 18, direct_fbo->width(), direct_fbo->height(), "draw call setup: " + std::to_string(m_setup_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 36, direct_fbo->width(), direct_fbo->height(), "vertex upload time: " + std::to_string(m_vertex_upload_time) + "us");
@ -2093,10 +2213,29 @@ void VKGSRender::flip(int buffer)
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us"); m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");
//Vertex upload statistics
u32 _small, _1k, _2k, _4k, _8k, _16k;
if (m_draw_calls > 0)
{
_small = m_uploads_small * 100 / m_draw_calls;
_1k = m_uploads_1k * 100 / m_draw_calls;
_2k = m_uploads_2k * 100 / m_draw_calls;
_4k = m_uploads_4k * 100 / m_draw_calls;
_8k = m_uploads_8k * 100 / m_draw_calls;
_16k = m_uploads_16k * 100 / m_draw_calls;
}
else
{
_small = _1k = _2k = _4k = _8k = _16k = 0;
}
std::string message = fmt::format("Vertex sizes: < 1k: %d%%, 1k+: %d%%, 2k+: %d%%, 4k+: %d%%, 8k+: %d%%, 16k+: %d%%", _small, _1k, _2k, _4k, _8k, _16k);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 108, direct_fbo->width(), direct_fbo->height(), message);
vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres); vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, subres);
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
} }
m_framebuffer_to_clean.push_back(std::move(direct_fbo));
queue_swap_request(); queue_swap_request();
} }
else else
@ -2194,4 +2333,11 @@ void VKGSRender::flip(int buffer)
m_setup_time = 0; m_setup_time = 0;
m_vertex_upload_time = 0; m_vertex_upload_time = 0;
m_textures_upload_time = 0; m_textures_upload_time = 0;
m_uploads_small = 0;
m_uploads_1k = 0;
m_uploads_2k = 0;
m_uploads_4k = 0;
m_uploads_8k = 0;
m_uploads_16k = 0;
} }

View file

@ -149,15 +149,26 @@ private:
vk::descriptor_pool descriptor_pool; vk::descriptor_pool descriptor_pool;
std::vector<std::unique_ptr<vk::buffer_view> > m_buffer_view_to_clean; std::vector<std::unique_ptr<vk::buffer_view> > m_buffer_view_to_clean;
std::vector<std::unique_ptr<vk::framebuffer> > m_framebuffer_to_clean;
std::vector<std::unique_ptr<vk::sampler> > m_sampler_to_clean; std::vector<std::unique_ptr<vk::sampler> > m_sampler_to_clean;
std::list<std::unique_ptr<vk::framebuffer_holder> > m_framebuffer_to_clean;
std::unique_ptr<vk::framebuffer_holder> m_draw_fbo;
u32 m_client_width = 0; u32 m_client_width = 0;
u32 m_client_height = 0; u32 m_client_height = 0;
// Draw call stats
u32 m_draw_calls = 0; u32 m_draw_calls = 0;
u32 m_instanced_draws = 0; u32 m_instanced_draws = 0;
// Vertex buffer usage stats
u32 m_uploads_small = 0;
u32 m_uploads_1k = 0;
u32 m_uploads_2k = 0;
u32 m_uploads_4k = 0;
u32 m_uploads_8k = 0;
u32 m_uploads_16k = 0;
// Timers
s64 m_setup_time = 0; s64 m_setup_time = 0;
s64 m_vertex_upload_time = 0; s64 m_vertex_upload_time = 0;
s64 m_textures_upload_time = 0; s64 m_textures_upload_time = 0;

View file

@ -650,17 +650,17 @@ namespace vk
{ {
VkFramebuffer value; VkFramebuffer value;
VkFramebufferCreateInfo info = {}; VkFramebufferCreateInfo info = {};
std::vector<std::unique_ptr<vk::image_view>> attachements; std::vector<std::unique_ptr<vk::image_view>> attachments;
u32 m_width = 0; u32 m_width = 0;
u32 m_height = 0; u32 m_height = 0;
public: public:
framebuffer(VkDevice dev, VkRenderPass pass, u32 width, u32 height, std::vector<std::unique_ptr<vk::image_view>> &&atts) framebuffer(VkDevice dev, VkRenderPass pass, u32 width, u32 height, std::vector<std::unique_ptr<vk::image_view>> &&atts)
: m_device(dev), attachements(std::move(atts)) : m_device(dev), attachments(std::move(atts))
{ {
std::vector<VkImageView> image_view_array(attachements.size()); std::vector<VkImageView> image_view_array(attachments.size());
size_t i = 0; size_t i = 0;
for (const auto &att : attachements) for (const auto &att : attachments)
{ {
image_view_array[i++] = att->value; image_view_array[i++] = att->value;
} }
@ -694,6 +694,24 @@ namespace vk
return m_height; return m_height;
} }
bool matches(std::vector<vk::image*> fbo_images, u32 width, u32 height)
{
if (m_width != width || m_height != height)
return false;
if (fbo_images.size() != attachments.size())
return false;
for (int n = 0; n < fbo_images.size(); ++n)
{
if (attachments[n]->info.image != fbo_images[n]->value ||
attachments[n]->info.format != fbo_images[n]->info.format)
return false;
}
return true;
}
framebuffer(const framebuffer&) = delete; framebuffer(const framebuffer&) = delete;
framebuffer(framebuffer&&) = delete; framebuffer(framebuffer&&) = delete;

View file

@ -7,12 +7,17 @@
#include "../Common/TextureUtils.h" #include "../Common/TextureUtils.h"
#include "VKFormats.h" #include "VKFormats.h"
struct ref_counted
{
u8 deref_count = 0;
void reset_refs() { deref_count = 0; }
};
namespace vk namespace vk
{ {
struct render_target : public image struct render_target : public image, public ref_counted
{ {
u8 deref_count = 0;
bool dirty = false; bool dirty = false;
u16 native_pitch = 0; u16 native_pitch = 0;
VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT; VkImageAspectFlags attachment_aspect_flag = VK_IMAGE_ASPECT_COLOR_BIT;
@ -36,6 +41,17 @@ namespace vk
mipmaps, layers, samples, initial_layout, tiling, usage, image_flags) mipmaps, layers, samples, initial_layout, tiling, usage, image_flags)
{} {}
}; };
struct framebuffer_holder: public vk::framebuffer, public ref_counted
{
framebuffer_holder(VkDevice dev,
VkRenderPass pass,
u32 width, u32 height,
std::vector<std::unique_ptr<vk::image_view>> &&atts)
: framebuffer(dev, pass, width, height, std::move(atts))
{}
};
} }
namespace rsx namespace rsx
@ -270,9 +286,9 @@ namespace rsx
void free_invalidated() void free_invalidated()
{ {
invalidated_resources.remove_if([](std::unique_ptr<vk::render_target>& rtt) invalidated_resources.remove_if([](std::unique_ptr<vk::render_target> &rtt)
{ {
if (rtt->deref_count > 1) return true; if (rtt->deref_count >= 2) return true;
rtt->deref_count++; rtt->deref_count++;
return false; return false;

View file

@ -477,7 +477,7 @@ namespace
{ {
const auto &vbo = vertex_buffers[i]; const auto &vbo = vertex_buffers[i];
if (vbo.which() == 0 && vertex_count >= g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready()) if (vbo.which() == 0 && vertex_count >= (u32)g_cfg.video.mt_vertex_upload_threshold && vertex_buffers.size() > 1 && rsxthr->vertex_upload_task_ready())
{ {
//vertex array buffer. We can thread this thing heavily //vertex array buffer. We can thread this thing heavily
const auto& v = vbo.get<rsx::vertex_array_buffer>(); const auto& v = vbo.get<rsx::vertex_array_buffer>();

View file

@ -146,8 +146,9 @@ void VKVertexDecompilerThread::insertConstants(std::stringstream & OS, const std
static const vertex_reg_info reg_table[] = static const vertex_reg_info reg_table[] =
{ {
{ "gl_Position", false, "dst_reg0", "", false }, { "gl_Position", false, "dst_reg0", "", false },
{ "back_diff_color", true, "dst_reg1", "", false }, //Technically these two are for both back and front
{ "back_spec_color", true, "dst_reg2", "", false }, { "back_diff_color", true, "dst_reg1", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTDIFFUSE },
{ "back_spec_color", true, "dst_reg2", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_FRONTSPECULAR },
{ "front_diff_color", true, "dst_reg3", "", false }, { "front_diff_color", true, "dst_reg3", "", false },
{ "front_spec_color", true, "dst_reg4", "", false }, { "front_spec_color", true, "dst_reg4", "", false },
{ "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG }, { "fog_c", true, "dst_reg5", ".xxxx", true, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_FOG },
@ -159,15 +160,15 @@ static const vertex_reg_info reg_table[] =
{ "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 }, { "gl_ClipDistance[3]", false, "dst_reg6", ".y * userClipFactor[0].w", false, "userClipEnabled[0].w > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC3 },
{ "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 }, { "gl_ClipDistance[4]", false, "dst_reg6", ".z * userClipFactor[1].x", false, "userClipEnabled[1].x > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC4 },
{ "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 }, { "gl_ClipDistance[5]", false, "dst_reg6", ".w * userClipFactor[1].y", false, "userClipEnabled[1].y > 0", "0.5", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_UC5 },
{ "tc0", true, "dst_reg7", "", false }, { "tc0", true, "dst_reg7", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX0 },
{ "tc1", true, "dst_reg8", "", false }, { "tc1", true, "dst_reg8", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX1 },
{ "tc2", true, "dst_reg9", "", false }, { "tc2", true, "dst_reg9", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX2 },
{ "tc3", true, "dst_reg10", "", false }, { "tc3", true, "dst_reg10", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX3 },
{ "tc4", true, "dst_reg11", "", false }, { "tc4", true, "dst_reg11", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX4 },
{ "tc5", true, "dst_reg12", "", false }, { "tc5", true, "dst_reg12", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX5 },
{ "tc6", true, "dst_reg13", "", false }, { "tc6", true, "dst_reg13", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX6 },
{ "tc7", true, "dst_reg14", "", false }, { "tc7", true, "dst_reg14", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX7 },
{ "tc8", true, "dst_reg15", "", false }, { "tc8", true, "dst_reg15", "", false, "", "", "", false, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX8 },
{ "tc9", true, "dst_reg6", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX9 } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15. { "tc9", true, "dst_reg6", "", false, "", "", "", true, CELL_GCM_ATTRIB_OUTPUT_MASK_TEX9 } // In this line, dst_reg6 is correct since dst_reg goes from 0 to 15.
}; };
@ -195,6 +196,16 @@ void VKVertexDecompilerThread::insertOutputs(std::stringstream & OS, const std::
const vk::varying_register_t &reg = vk::get_varying_register(i.name); const vk::varying_register_t &reg = vk::get_varying_register(i.name);
OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n"; OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n";
} }
else
{
//Force some outputs to be declared even if unused so we can set default values
//NOTE: Registers that can be skept will not have their check_mask_value set
if (i.need_declare && (rsx_vertex_program.output_mask & i.check_mask_value) > 0)
{
const vk::varying_register_t &reg = vk::get_varying_register(i.name);
OS << "layout(location=" << reg.reg_location << ") out vec4 " << i.name << ";\n";
}
}
} }
if (insert_back_diffuse && insert_front_diffuse) if (insert_back_diffuse && insert_front_diffuse)