rsx/vk: Improvements and minor optimizations

- Improve dirty state tracking affecting program state
- vk: Refactor out transform constants upload into a separate channel to avoid if possible
  transform data uploads are quite expensive
This commit is contained in:
kd-11 2018-04-20 23:44:34 +03:00 committed by kd-11
parent 440a31ef18
commit b7979d3f57
8 changed files with 130 additions and 75 deletions

View file

@ -141,18 +141,22 @@ fragment_program_utils::fragment_program_metadata fragment_program_utils::analys
if (program_offset < 0) if (program_offset < 0)
program_offset = instIndex * 16; program_offset = instIndex * 16;
if (opcode == RSX_FP_OPCODE_TEX || switch(opcode)
opcode == RSX_FP_OPCODE_TEXBEM || {
opcode == RSX_FP_OPCODE_TXP || case RSX_FP_OPCODE_TEX:
opcode == RSX_FP_OPCODE_TXPBEM || case RSX_FP_OPCODE_TEXBEM:
opcode == RSX_FP_OPCODE_TXD || case RSX_FP_OPCODE_TXP:
opcode == RSX_FP_OPCODE_TXB || case RSX_FP_OPCODE_TXPBEM:
opcode == RSX_FP_OPCODE_TXL) case RSX_FP_OPCODE_TXD:
case RSX_FP_OPCODE_TXB:
case RSX_FP_OPCODE_TXL:
{ {
//Bits 17-20 of word 1, swapped within u16 sections //Bits 17-20 of word 1, swapped within u16 sections
//Bits 16-23 are swapped into the upper 8 bits (24-31) //Bits 16-23 are swapped into the upper 8 bits (24-31)
const u32 tex_num = (inst.word[0] >> 25) & 15; const u32 tex_num = (inst.word[0] >> 25) & 15;
textures_mask |= (1 << tex_num); textures_mask |= (1 << tex_num);
break;
}
} }
if (is_constant(inst.word[1]) || is_constant(inst.word[2]) || is_constant(inst.word[3])) if (is_constant(inst.word[1]) || is_constant(inst.word[2]) || is_constant(inst.word[3]))

View file

@ -389,17 +389,17 @@ void D3D12GSRender::end()
.Offset((INT)currentDescriptorIndex + vertex_buffer_count, m_descriptor_stride_srv_cbv_uav) .Offset((INT)currentDescriptorIndex + vertex_buffer_count, m_descriptor_stride_srv_cbv_uav)
); );
if (m_transform_constants_dirty && !g_cfg.video.debug_output) if (!g_cfg.video.debug_output && (m_graphics_state & rsx::pipeline_state::transform_constants_dirty))
{ {
m_current_transform_constants_buffer_descriptor_id = (u32)currentDescriptorIndex + 1 + vertex_buffer_count; m_current_transform_constants_buffer_descriptor_id = (u32)currentDescriptorIndex + 1 + vertex_buffer_count;
upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count); upload_and_bind_vertex_shader_constants(currentDescriptorIndex + 1 + vertex_buffer_count);
m_transform_constants_dirty = false;
get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(VERTEX_CONSTANT_BUFFERS_SLOT, get_current_resource_storage().command_list->SetGraphicsRootDescriptorTable(VERTEX_CONSTANT_BUFFERS_SLOT,
CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart()) CD3DX12_GPU_DESCRIPTOR_HANDLE(get_current_resource_storage().descriptors_heap->GetGPUDescriptorHandleForHeapStart())
.Offset(m_current_transform_constants_buffer_descriptor_id, m_descriptor_stride_srv_cbv_uav) .Offset(m_current_transform_constants_buffer_descriptor_id, m_descriptor_stride_srv_cbv_uav)
); );
} }
m_graphics_state = 0;
std::chrono::time_point<steady_clock> constants_duration_end = steady_clock::now(); std::chrono::time_point<steady_clock> constants_duration_end = steady_clock::now();
m_timers.constants_duration += std::chrono::duration_cast<std::chrono::microseconds>(constants_duration_end - constants_duration_start).count(); m_timers.constants_duration += std::chrono::duration_cast<std::chrono::microseconds>(constants_duration_end - constants_duration_start).count();

View file

@ -1049,7 +1049,7 @@ bool GLGSRender::check_program_state()
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
{ {
if (m_fragment_program_dirty || m_vertex_program_dirty) if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{ {
get_current_fragment_program(fs_sampler_state); get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid; verify(HERE), current_fragment_program.valid;
@ -1091,12 +1091,13 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program); const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program);
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float)); const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
const bool update_transform_constants = !!(m_graphics_state & rsx::pipeline_state::transform_constants_dirty);
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
m_vertex_state_buffer->reserve_storage_on_heap(512); m_vertex_state_buffer->reserve_storage_on_heap(512);
m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256)); m_fragment_constants_buffer->reserve_storage_on_heap(align(fragment_buffer_size, 256));
if (m_transform_constants_dirty) m_transform_constants_buffer->reserve_storage_on_heap(8192); if (update_transform_constants) m_transform_constants_buffer->reserve_storage_on_heap(8192);
} }
// Vertex state // Vertex state
@ -1112,7 +1113,7 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max(); *(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset); fill_vertex_layout_state(m_vertex_layout, upload_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160), upload_info.persistent_mapping_offset, upload_info.volatile_mapping_offset);
if (m_transform_constants_dirty) if (update_transform_constants)
{ {
// Vertex constants // Vertex constants
mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align); mapping = m_transform_constants_buffer->alloc_from_heap(8192, m_uniform_buffer_offset_align);
@ -1137,17 +1138,17 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);
if (m_transform_constants_dirty) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192); if (update_transform_constants) m_transform_constants_buffer->bind_range(1, vertex_constants_offset, 8192);
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
{ {
m_vertex_state_buffer->unmap(); m_vertex_state_buffer->unmap();
m_fragment_constants_buffer->unmap(); m_fragment_constants_buffer->unmap();
if (m_transform_constants_dirty) m_transform_constants_buffer->unmap(); if (update_transform_constants) m_transform_constants_buffer->unmap();
} }
m_transform_constants_dirty = false; m_graphics_state = 0;
} }
void GLGSRender::update_draw_state() void GLGSRender::update_draw_state()

View file

@ -246,7 +246,8 @@ namespace rsx
m_rtts_dirty = true; m_rtts_dirty = true;
memset(m_textures_dirty, -1, sizeof(m_textures_dirty)); memset(m_textures_dirty, -1, sizeof(m_textures_dirty));
memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty)); memset(m_vertex_textures_dirty, -1, sizeof(m_vertex_textures_dirty));
m_transform_constants_dirty = true;
m_graphics_state = pipeline_state::all_dirty;
} }
thread::~thread() thread::~thread()
@ -1329,10 +1330,10 @@ namespace rsx
void thread::get_current_vertex_program() void thread::get_current_vertex_program()
{ {
if (!m_vertex_program_dirty) if (!(m_graphics_state & rsx::pipeline_state::vertex_program_dirty))
return; return;
m_vertex_program_dirty = false; m_graphics_state &= ~(rsx::pipeline_state::vertex_program_dirty);
const u32 transform_program_start = rsx::method_registers.transform_program_start(); const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = false; current_vertex_program.skip_vertex_input_check = false;
@ -1544,10 +1545,10 @@ namespace rsx
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors) void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{ {
if (!m_fragment_program_dirty) if (!(m_graphics_state & rsx::pipeline_state::fragment_program_dirty))
return; return;
m_fragment_program_dirty = false; m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty);
auto &result = current_fragment_program = {}; auto &result = current_fragment_program = {};
const u32 shader_program = rsx::method_registers.shader_program_address(); const u32 shader_program = rsx::method_registers.shader_program_address();

View file

@ -69,6 +69,18 @@ namespace rsx
context_clear_all = context_clear_color | context_clear_depth context_clear_all = context_clear_color | context_clear_depth
}; };
enum pipeline_state : u8
{
fragment_program_dirty = 1,
vertex_program_dirty = 2,
fragment_state_dirty = 4,
vertex_state_dirty = 8,
transform_constants_dirty = 16,
invalidate_pipeline_bits = fragment_program_dirty | vertex_program_dirty,
all_dirty = 255
};
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size); u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size);
u32 get_address(u32 offset, u32 location); u32 get_address(u32 offset, u32 location);
@ -327,12 +339,10 @@ namespace rsx
u32 local_mem_addr, main_mem_addr; u32 local_mem_addr, main_mem_addr;
bool m_rtts_dirty; bool m_rtts_dirty;
bool m_transform_constants_dirty;
bool m_textures_dirty[16]; bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4]; bool m_vertex_textures_dirty[4];
bool m_framebuffer_state_contested = false; bool m_framebuffer_state_contested = false;
bool m_fragment_program_dirty = false; u32 m_graphics_state = 0;
bool m_vertex_program_dirty = false;
protected: protected:
std::array<u32, 4> get_color_surface_addresses() const; std::array<u32, 4> get_color_surface_addresses() const;

View file

@ -593,6 +593,8 @@ VKGSRender::VKGSRender() : GSRender()
m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0)); m_attrib_ring_info.heap.reset(new vk::buffer(*m_device, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, 0));
m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); m_uniform_buffer_ring_info.init(VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0)); m_uniform_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_transform_constants_ring_info.init(VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_transform_constants_ring_info.heap.reset(new vk::buffer(*m_device, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, 0));
m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.init(VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0)); m_index_buffer_ring_info.heap.reset(new vk::buffer(*m_device, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, memory_map.host_visible_coherent, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, 0));
m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_texture_upload_buffer_ring_info.init(VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
@ -688,6 +690,7 @@ VKGSRender::~VKGSRender()
//Heaps //Heaps
m_index_buffer_ring_info.heap.reset(); m_index_buffer_ring_info.heap.reset();
m_uniform_buffer_ring_info.heap.reset(); m_uniform_buffer_ring_info.heap.reset();
m_transform_constants_ring_info.heap.reset();
m_attrib_ring_info.heap.reset(); m_attrib_ring_info.heap.reset();
m_texture_upload_buffer_ring_info.heap.reset(); m_texture_upload_buffer_ring_info.heap.reset();
@ -893,6 +896,7 @@ void VKGSRender::check_heap_status()
if (m_attrib_ring_info.is_critical() || if (m_attrib_ring_info.is_critical() ||
m_texture_upload_buffer_ring_info.is_critical() || m_texture_upload_buffer_ring_info.is_critical() ||
m_uniform_buffer_ring_info.is_critical() || m_uniform_buffer_ring_info.is_critical() ||
m_transform_constants_ring_info.is_critical() ||
m_index_buffer_ring_info.is_critical()) m_index_buffer_ring_info.is_critical())
{ {
std::chrono::time_point<steady_clock> submit_start = steady_clock::now(); std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
@ -917,6 +921,7 @@ void VKGSRender::check_heap_status()
m_index_buffer_ring_info.reset_allocation_stats(); m_index_buffer_ring_info.reset_allocation_stats();
m_uniform_buffer_ring_info.reset_allocation_stats(); m_uniform_buffer_ring_info.reset_allocation_stats();
m_transform_constants_ring_info.reset_allocation_stats();
m_attrib_ring_info.reset_allocation_stats(); m_attrib_ring_info.reset_allocation_stats();
m_texture_upload_buffer_ring_info.reset_allocation_stats(); m_texture_upload_buffer_ring_info.reset_allocation_stats();
m_current_frame->reset_heap_ptrs(); m_current_frame->reset_heap_ptrs();
@ -1938,6 +1943,7 @@ void VKGSRender::advance_queued_frames()
m_vertex_cache->purge(); m_vertex_cache->purge();
m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(), m_current_frame->tag_frame_end(m_attrib_ring_info.get_current_put_pos_minus_one(),
m_uniform_buffer_ring_info.get_current_put_pos_minus_one(), m_uniform_buffer_ring_info.get_current_put_pos_minus_one(),
m_transform_constants_ring_info.get_current_put_pos_minus_one(),
m_index_buffer_ring_info.get_current_put_pos_minus_one(), m_index_buffer_ring_info.get_current_put_pos_minus_one(),
m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one()); m_texture_upload_buffer_ring_info.get_current_put_pos_minus_one());
@ -2045,11 +2051,13 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
//Heap cleanup; deallocates memory consumed by the frame if it is still held //Heap cleanup; deallocates memory consumed by the frame if it is still held
m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr; m_attrib_ring_info.m_get_pos = ctx->attrib_heap_ptr;
m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr; m_uniform_buffer_ring_info.m_get_pos = ctx->ubo_heap_ptr;
m_transform_constants_ring_info.m_get_pos = ctx->vtxconst_heap_ptr;
m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr; m_index_buffer_ring_info.m_get_pos = ctx->index_heap_ptr;
m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr; m_texture_upload_buffer_ring_info.m_get_pos = ctx->texture_upload_heap_ptr;
m_attrib_ring_info.notify(); m_attrib_ring_info.notify();
m_uniform_buffer_ring_info.notify(); m_uniform_buffer_ring_info.notify();
m_transform_constants_ring_info.notify();
m_index_buffer_ring_info.notify(); m_index_buffer_ring_info.notify();
m_texture_upload_buffer_ring_info.notify(); m_texture_upload_buffer_ring_info.notify();
} }
@ -2209,7 +2217,7 @@ bool VKGSRender::check_program_status()
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
{ {
if (m_fragment_program_dirty || m_vertex_program_dirty) if (m_graphics_state & rsx::pipeline_state::invalidate_pipeline_bits)
{ {
get_current_fragment_program(fs_sampler_state); get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid; verify(HERE), current_fragment_program.valid;
@ -2219,6 +2227,7 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
auto &vertex_program = current_vertex_program; auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program; auto &fragment_program = current_fragment_program;
auto old_program = m_program;
vk::pipeline_props properties = {}; vk::pipeline_props properties = {};
@ -2372,49 +2381,66 @@ void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
vk::leave_uninterruptible(); vk::leave_uninterruptible();
const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program); if (1)//m_graphics_state & (rsx::pipeline_state::fragment_state_dirty | rsx::pipeline_state::vertex_state_dirty))
const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
const size_t required_mem = 512 + 8192 + fragment_buffer_sz;
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
const size_t vertex_constants_offset = vertex_state_offset + 512;
const size_t fragment_constants_offset = vertex_constants_offset + 8192;
//We do this in one go
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
//Vertex state
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_info.vertex_index_base;
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
//Vertex constants
buf = buf + 512;
fill_vertex_program_constants_data(buf);
m_transform_constants_dirty = false;
//Fragment constants
buf = buf + 8192;
if (fragment_constants_sz)
{ {
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) }, const size_t fragment_constants_sz = m_prog_buffer->get_fragment_constants_buffer_size(fragment_program);
fragment_program, vk::sanitize_fp_values()); const size_t fragment_buffer_sz = fragment_constants_sz + (18 * 4 * sizeof(float));
const size_t required_mem = 512 + fragment_buffer_sz;
const size_t vertex_state_offset = m_uniform_buffer_ring_info.alloc<256>(required_mem);
const size_t fragment_constants_offset = vertex_state_offset + 512;
//We do this in one go
u8 *buf = (u8*)m_uniform_buffer_ring_info.map(vertex_state_offset, required_mem);
//Vertex state
fill_scale_offset_data(buf, false);
fill_user_clip_data(buf + 64);
*(reinterpret_cast<u32*>(buf + 128)) = rsx::method_registers.transform_branch_bits();
*(reinterpret_cast<u32*>(buf + 132)) = vertex_info.vertex_index_base;
*(reinterpret_cast<f32*>(buf + 136)) = rsx::method_registers.point_size();
*(reinterpret_cast<f32*>(buf + 140)) = rsx::method_registers.clip_min();
*(reinterpret_cast<f32*>(buf + 144)) = rsx::method_registers.clip_max();
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, reinterpret_cast<s32*>(buf + 160),
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
//Fragment constants
buf = buf + 512;
if (fragment_constants_sz)
{
m_prog_buffer->fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), ::narrow<int>(fragment_constants_sz) },
fragment_program, vk::sanitize_fp_values());
}
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program);
m_uniform_buffer_ring_info.unmap();
m_vertex_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 };
m_fragment_state_buffer_info = { m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz };
} }
fill_fragment_state_buffer(buf + fragment_constants_sz, fragment_program); if (m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
{
//Vertex constants
const size_t vertex_constants_offset = m_transform_constants_ring_info.alloc<256>(8192);
auto buf = m_transform_constants_ring_info.map(vertex_constants_offset, 8192);
m_uniform_buffer_ring_info.unmap(); fill_vertex_program_constants_data(buf);
m_transform_constants_ring_info.unmap();
m_vertex_constants_buffer_info = { m_transform_constants_ring_info.heap->value, vertex_constants_offset, 8192 };
}
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_state_offset, 512 }, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set); if (1)//m_graphics_state || old_program != m_program)
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, vertex_constants_offset, 8192 }, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); {
m_program->bind_uniform({ m_uniform_buffer_ring_info.heap->value, fragment_constants_offset, fragment_buffer_sz }, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set); m_program->bind_uniform(m_vertex_state_buffer_info, SCALE_OFFSET_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_vertex_constants_buffer_info, VERTEX_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
m_program->bind_uniform(m_fragment_state_buffer_info, FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT, m_current_frame->descriptor_set);
}
//Clear flags
m_graphics_state = 0;
} }
static const u32 mr_color_offset[rsx::limits::color_buffers_count] = static const u32 mr_color_offset[rsx::limits::color_buffers_count] =

View file

@ -40,7 +40,8 @@ namespace vk
//NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px) //NOTE: Texture uploads can be huge, upto 16MB for a single texture (4096x4096px)
#define VK_ATTRIB_RING_BUFFER_SIZE_M 384 #define VK_ATTRIB_RING_BUFFER_SIZE_M 384
#define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256 #define VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M 256
#define VK_UBO_RING_BUFFER_SIZE_M 128 #define VK_UBO_RING_BUFFER_SIZE_M 64
#define VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M 64
#define VK_INDEX_RING_BUFFER_SIZE_M 64 #define VK_INDEX_RING_BUFFER_SIZE_M 64
#define VK_MAX_ASYNC_CB_COUNT 64 #define VK_MAX_ASYNC_CB_COUNT 64
@ -152,6 +153,7 @@ struct frame_context_t
//Heap pointers //Heap pointers
s64 attrib_heap_ptr = 0; s64 attrib_heap_ptr = 0;
s64 ubo_heap_ptr = 0; s64 ubo_heap_ptr = 0;
s64 vtxconst_heap_ptr = 0;
s64 index_heap_ptr = 0; s64 index_heap_ptr = 0;
s64 texture_upload_heap_ptr = 0; s64 texture_upload_heap_ptr = 0;
@ -167,6 +169,7 @@ struct frame_context_t
attrib_heap_ptr = other.attrib_heap_ptr; attrib_heap_ptr = other.attrib_heap_ptr;
ubo_heap_ptr = other.attrib_heap_ptr; ubo_heap_ptr = other.attrib_heap_ptr;
vtxconst_heap_ptr = other.vtxconst_heap_ptr;
index_heap_ptr = other.attrib_heap_ptr; index_heap_ptr = other.attrib_heap_ptr;
texture_upload_heap_ptr = other.texture_upload_heap_ptr; texture_upload_heap_ptr = other.texture_upload_heap_ptr;
} }
@ -178,10 +181,11 @@ struct frame_context_t
std::swap(samplers_to_clean, other.samplers_to_clean); std::swap(samplers_to_clean, other.samplers_to_clean);
} }
void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 index_loc, s64 texture_loc) void tag_frame_end(s64 attrib_loc, s64 ubo_loc, s64 vtxconst_loc, s64 index_loc, s64 texture_loc)
{ {
attrib_heap_ptr = attrib_loc; attrib_heap_ptr = attrib_loc;
ubo_heap_ptr = ubo_loc; ubo_heap_ptr = ubo_loc;
vtxconst_heap_ptr = vtxconst_loc;
index_heap_ptr = index_loc; index_heap_ptr = index_loc;
texture_upload_heap_ptr = texture_loc; texture_upload_heap_ptr = texture_loc;
@ -314,9 +318,14 @@ private:
u64 m_last_heap_sync_time = 0; u64 m_last_heap_sync_time = 0;
vk::vk_data_heap m_attrib_ring_info; vk::vk_data_heap m_attrib_ring_info;
vk::vk_data_heap m_uniform_buffer_ring_info; vk::vk_data_heap m_uniform_buffer_ring_info;
vk::vk_data_heap m_transform_constants_ring_info;
vk::vk_data_heap m_index_buffer_ring_info; vk::vk_data_heap m_index_buffer_ring_info;
vk::vk_data_heap m_texture_upload_buffer_ring_info; vk::vk_data_heap m_texture_upload_buffer_ring_info;
VkDescriptorBufferInfo m_vertex_state_buffer_info;
VkDescriptorBufferInfo m_vertex_constants_buffer_info;
VkDescriptorBufferInfo m_fragment_state_buffer_info;
std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage; std::array<frame_context_t, VK_MAX_ASYNC_FRAMES> frame_context_storage;
//Temp frame context to use if the real frame queue is overburdened. Only used for storage //Temp frame context to use if the real frame queue is overburdened. Only used for storage
frame_context_t m_aux_frame_context; frame_context_t m_aux_frame_context;

View file

@ -342,12 +342,17 @@ namespace rsx
u32 load = rsx::method_registers.transform_constant_load(); u32 load = rsx::method_registers.transform_constant_load();
if ((load + index) >= 512) if ((load + index) >= 512)
{ {
LOG_ERROR(RSX, "Invalid register index (load=%d, index=%d)", load, index); LOG_ERROR(RSX, "Invalid transform register index (load=%d, index=%d)", load, index);
return; return;
} }
rsx::method_registers.transform_constants[load + reg][subreg] = arg; auto &value = rsx::method_registers.transform_constants[load + reg][subreg];
rsxthr->m_transform_constants_dirty = true; if (value != arg)
{
//Transform constants invalidation is expensive (~8k bytes per update)
value = arg;
rsxthr->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
}
} }
}; };
@ -357,19 +362,18 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg) static void impl(thread* rsx, u32 _reg, u32 arg)
{ {
method_registers.commit_4_transform_program_instructions(index); method_registers.commit_4_transform_program_instructions(index);
rsx->m_vertex_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
} }
}; };
void set_transform_program_start(thread* rsx, u32, u32) void set_transform_program_start(thread* rsx, u32, u32)
{ {
rsx->m_vertex_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty;
} }
void set_vertex_attribute_output_mask(thread* rsx, u32, u32) void set_vertex_attribute_output_mask(thread* rsx, u32, u32)
{ {
rsx->m_vertex_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_dirty | rsx::pipeline_state::fragment_program_dirty;
rsx->m_fragment_program_dirty = true;
} }
void set_begin_end(thread* rsxthr, u32 _reg, u32 arg) void set_begin_end(thread* rsxthr, u32 _reg, u32 arg)
@ -535,7 +539,7 @@ namespace rsx
void invalidate_L2(thread* rsx, u32, u32) void invalidate_L2(thread* rsx, u32, u32)
{ {
rsx->m_fragment_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
} }
void set_surface_dirty_bit(thread* rsx, u32, u32) void set_surface_dirty_bit(thread* rsx, u32, u32)
@ -556,7 +560,7 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg) static void impl(thread* rsx, u32 _reg, u32 arg)
{ {
rsx->m_textures_dirty[index] = true; rsx->m_textures_dirty[index] = true;
rsx->m_fragment_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
} }
}; };
@ -584,7 +588,7 @@ namespace rsx
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062()); u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
vm::write32(address, arg); vm::write32(address, arg);
rsx->m_fragment_program_dirty = true; rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
} }
}; };
} }