rsx: Optimizations for program management

This commit is contained in:
kd-11 2018-04-10 18:06:29 +03:00 committed by kd-11
parent a52ea7f870
commit 440a31ef18
5 changed files with 81 additions and 39 deletions

View file

@ -1048,25 +1048,24 @@ bool GLGSRender::check_program_state()
} }
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info) void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
{
if (m_fragment_program_dirty || m_vertex_program_dirty)
{ {
get_current_fragment_program(fs_sampler_state); get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid; verify(HERE), current_fragment_program.valid;
get_current_vertex_program(); get_current_vertex_program();
auto &fragment_program = current_fragment_program; current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
auto &vertex_program = current_vertex_program; current_fragment_program.unnormalized_coords = 0; //unused
vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
fragment_program.unnormalized_coords = 0; //unused
void* pipeline_properties = nullptr; void* pipeline_properties = nullptr;
m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties); m_program = &m_prog_buffer.getGraphicPipelineState(current_vertex_program, current_fragment_program, pipeline_properties);
m_program->use(); m_program->use();
if (m_prog_buffer.check_cache_missed()) if (m_prog_buffer.check_cache_missed())
{ {
m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program); m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program);
//Notify the user with HUD notification //Notify the user with HUD notification
if (g_cfg.misc.show_shader_compilation_hint) if (g_cfg.misc.show_shader_compilation_hint)
@ -1083,13 +1082,14 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
} }
} }
} }
}
u8 *buf; u8 *buf;
u32 vertex_state_offset; u32 vertex_state_offset;
u32 vertex_constants_offset; u32 vertex_constants_offset;
u32 fragment_constants_offset; u32 fragment_constants_offset;
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program); const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program);
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float)); const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
@ -1128,11 +1128,11 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
if (fragment_constants_size) if (fragment_constants_size)
{ {
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) }, m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
fragment_program, gl::get_driver_caps().vendor_NVIDIA); current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
} }
// Fragment state // Fragment state
fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program); fill_fragment_state_buffer(buf+fragment_constants_size, current_fragment_program);
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512); m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size); m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);

View file

@ -1329,6 +1329,10 @@ namespace rsx
void thread::get_current_vertex_program() void thread::get_current_vertex_program()
{ {
if (!m_vertex_program_dirty)
return;
m_vertex_program_dirty = false;
const u32 transform_program_start = rsx::method_registers.transform_program_start(); const u32 transform_program_start = rsx::method_registers.transform_program_start();
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask(); current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
current_vertex_program.skip_vertex_input_check = false; current_vertex_program.skip_vertex_input_check = false;
@ -1341,8 +1345,8 @@ namespace rsx
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32)); memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data); current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
current_vertex_program.data.resize(program_info.ucode_size); current_vertex_program.data.resize(current_vp_metadata.ucode_size);
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask(); const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask(); const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -1540,20 +1544,27 @@ namespace rsx
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors) void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
{ {
if (!m_fragment_program_dirty)
return;
m_fragment_program_dirty = false;
auto &result = current_fragment_program = {}; auto &result = current_fragment_program = {};
const u32 shader_program = rsx::method_registers.shader_program_address(); const u32 shader_program = rsx::method_registers.shader_program_address();
if (shader_program == 0) if (shader_program == 0)
{
current_fp_metadata = {};
return; return;
}
const u32 program_location = (shader_program & 0x3) - 1; const u32 program_location = (shader_program & 0x3) - 1;
const u32 program_offset = (shader_program & ~0x3); const u32 program_offset = (shader_program & ~0x3);
result.addr = vm::base(rsx::get_address(program_offset, program_location)); result.addr = vm::base(rsx::get_address(program_offset, program_location));
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr); current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
result.addr = ((u8*)result.addr + program_info.program_start_offset); result.addr = ((u8*)result.addr + current_fp_metadata.program_start_offset);
result.offset = program_offset + program_info.program_start_offset; result.offset = program_offset + current_fp_metadata.program_start_offset;
result.valid = true; result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT); result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0; result.unnormalized_coords = 0;
@ -1574,7 +1585,7 @@ namespace rsx
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y; result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
result.texture_scale[i][2] = (f32)tex.remap(); //Debug value result.texture_scale[i][2] = (f32)tex.remap(); //Debug value
if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i))) if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i)))
{ {
u32 texture_control = 0; u32 texture_control = 0;
result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1)); result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1));

View file

@ -331,6 +331,8 @@ namespace rsx
bool m_textures_dirty[16]; bool m_textures_dirty[16];
bool m_vertex_textures_dirty[4]; bool m_vertex_textures_dirty[4];
bool m_framebuffer_state_contested = false; bool m_framebuffer_state_contested = false;
bool m_fragment_program_dirty = false;
bool m_vertex_program_dirty = false;
protected: protected:
std::array<u32, 4> get_color_surface_addresses() const; std::array<u32, 4> get_color_surface_addresses() const;
@ -344,6 +346,9 @@ namespace rsx
RSXVertexProgram current_vertex_program = {}; RSXVertexProgram current_vertex_program = {};
RSXFragmentProgram current_fragment_program = {}; RSXFragmentProgram current_fragment_program = {};
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
void get_current_vertex_program(); void get_current_vertex_program();
/** /**

View file

@ -2208,11 +2208,14 @@ bool VKGSRender::check_program_status()
} }
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info) void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
{
if (m_fragment_program_dirty || m_vertex_program_dirty)
{ {
get_current_fragment_program(fs_sampler_state); get_current_fragment_program(fs_sampler_state);
verify(HERE), current_fragment_program.valid; verify(HERE), current_fragment_program.valid;
get_current_vertex_program(); get_current_vertex_program();
}
auto &vertex_program = current_vertex_program; auto &vertex_program = current_vertex_program;
auto &fragment_program = current_fragment_program; auto &fragment_program = current_fragment_program;

View file

@ -357,9 +357,21 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg) static void impl(thread* rsx, u32 _reg, u32 arg)
{ {
method_registers.commit_4_transform_program_instructions(index); method_registers.commit_4_transform_program_instructions(index);
rsx->m_vertex_program_dirty = true;
} }
}; };
void set_transform_program_start(thread* rsx, u32, u32)
{
rsx->m_vertex_program_dirty = true;
}
void set_vertex_attribute_output_mask(thread* rsx, u32, u32)
{
rsx->m_vertex_program_dirty = true;
rsx->m_fragment_program_dirty = true;
}
void set_begin_end(thread* rsxthr, u32 _reg, u32 arg) void set_begin_end(thread* rsxthr, u32 _reg, u32 arg)
{ {
if (arg) if (arg)
@ -521,6 +533,11 @@ namespace rsx
rsx->sync(); rsx->sync();
} }
void invalidate_L2(thread* rsx, u32, u32)
{
rsx->m_fragment_program_dirty = true;
}
void set_surface_dirty_bit(thread* rsx, u32, u32) void set_surface_dirty_bit(thread* rsx, u32, u32)
{ {
rsx->m_rtts_dirty = true; rsx->m_rtts_dirty = true;
@ -539,6 +556,7 @@ namespace rsx
static void impl(thread* rsx, u32 _reg, u32 arg) static void impl(thread* rsx, u32 _reg, u32 arg)
{ {
rsx->m_textures_dirty[index] = true; rsx->m_textures_dirty[index] = true;
rsx->m_fragment_program_dirty = true;
} }
}; };
@ -565,6 +583,8 @@ namespace rsx
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2); const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062()); u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
vm::write32(address, arg); vm::write32(address, arg);
rsx->m_fragment_program_dirty = true;
} }
}; };
} }
@ -1711,6 +1731,9 @@ namespace rsx
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>(); bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_ZCULL_SYNC, nv4097::sync>(); bind<NV4097_ZCULL_SYNC, nv4097::sync>();
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>(); bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
bind<NV4097_INVALIDATE_L2, nv4097::invalidate_L2>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
//NV308A //NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>(); bind_range<NV308A_COLOR, 1, 256, nv308a::color>();