mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 17:58:37 +12:00
rsx: Optimizations for program management
This commit is contained in:
parent
a52ea7f870
commit
440a31ef18
5 changed files with 81 additions and 39 deletions
|
@ -1048,25 +1048,24 @@ bool GLGSRender::check_program_state()
|
||||||
}
|
}
|
||||||
|
|
||||||
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
|
void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
|
||||||
|
{
|
||||||
|
if (m_fragment_program_dirty || m_vertex_program_dirty)
|
||||||
{
|
{
|
||||||
get_current_fragment_program(fs_sampler_state);
|
get_current_fragment_program(fs_sampler_state);
|
||||||
verify(HERE), current_fragment_program.valid;
|
verify(HERE), current_fragment_program.valid;
|
||||||
|
|
||||||
get_current_vertex_program();
|
get_current_vertex_program();
|
||||||
|
|
||||||
auto &fragment_program = current_fragment_program;
|
current_vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
|
||||||
auto &vertex_program = current_vertex_program;
|
current_fragment_program.unnormalized_coords = 0; //unused
|
||||||
|
|
||||||
vertex_program.skip_vertex_input_check = true; //not needed for us since decoding is done server side
|
|
||||||
fragment_program.unnormalized_coords = 0; //unused
|
|
||||||
void* pipeline_properties = nullptr;
|
void* pipeline_properties = nullptr;
|
||||||
|
|
||||||
m_program = &m_prog_buffer.getGraphicPipelineState(vertex_program, fragment_program, pipeline_properties);
|
m_program = &m_prog_buffer.getGraphicPipelineState(current_vertex_program, current_fragment_program, pipeline_properties);
|
||||||
m_program->use();
|
m_program->use();
|
||||||
|
|
||||||
if (m_prog_buffer.check_cache_missed())
|
if (m_prog_buffer.check_cache_missed())
|
||||||
{
|
{
|
||||||
m_shaders_cache->store(pipeline_properties, vertex_program, fragment_program);
|
m_shaders_cache->store(pipeline_properties, current_vertex_program, current_fragment_program);
|
||||||
|
|
||||||
//Notify the user with HUD notification
|
//Notify the user with HUD notification
|
||||||
if (g_cfg.misc.show_shader_compilation_hint)
|
if (g_cfg.misc.show_shader_compilation_hint)
|
||||||
|
@ -1083,13 +1082,14 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u8 *buf;
|
u8 *buf;
|
||||||
u32 vertex_state_offset;
|
u32 vertex_state_offset;
|
||||||
u32 vertex_constants_offset;
|
u32 vertex_constants_offset;
|
||||||
u32 fragment_constants_offset;
|
u32 fragment_constants_offset;
|
||||||
|
|
||||||
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(fragment_program);
|
const u32 fragment_constants_size = (const u32)m_prog_buffer.get_fragment_constants_buffer_size(current_fragment_program);
|
||||||
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
|
const u32 fragment_buffer_size = fragment_constants_size + (18 * 4 * sizeof(float));
|
||||||
|
|
||||||
if (manually_flush_ring_buffers)
|
if (manually_flush_ring_buffers)
|
||||||
|
@ -1128,11 +1128,11 @@ void GLGSRender::load_program(const gl::vertex_upload_info& upload_info)
|
||||||
if (fragment_constants_size)
|
if (fragment_constants_size)
|
||||||
{
|
{
|
||||||
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
|
m_prog_buffer.fill_fragment_constants_buffer({ reinterpret_cast<float*>(buf), gsl::narrow<int>(fragment_constants_size) },
|
||||||
fragment_program, gl::get_driver_caps().vendor_NVIDIA);
|
current_fragment_program, gl::get_driver_caps().vendor_NVIDIA);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fragment state
|
// Fragment state
|
||||||
fill_fragment_state_buffer(buf+fragment_constants_size, fragment_program);
|
fill_fragment_state_buffer(buf+fragment_constants_size, current_fragment_program);
|
||||||
|
|
||||||
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
|
m_vertex_state_buffer->bind_range(0, vertex_state_offset, 512);
|
||||||
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);
|
m_fragment_constants_buffer->bind_range(2, fragment_constants_offset, fragment_buffer_size);
|
||||||
|
|
|
@ -1329,6 +1329,10 @@ namespace rsx
|
||||||
|
|
||||||
void thread::get_current_vertex_program()
|
void thread::get_current_vertex_program()
|
||||||
{
|
{
|
||||||
|
if (!m_vertex_program_dirty)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_vertex_program_dirty = false;
|
||||||
const u32 transform_program_start = rsx::method_registers.transform_program_start();
|
const u32 transform_program_start = rsx::method_registers.transform_program_start();
|
||||||
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
current_vertex_program.output_mask = rsx::method_registers.vertex_attrib_output_mask();
|
||||||
current_vertex_program.skip_vertex_input_check = false;
|
current_vertex_program.skip_vertex_input_check = false;
|
||||||
|
@ -1341,8 +1345,8 @@ namespace rsx
|
||||||
|
|
||||||
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
|
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
|
||||||
|
|
||||||
auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
|
current_vp_metadata = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
|
||||||
current_vertex_program.data.resize(program_info.ucode_size);
|
current_vertex_program.data.resize(current_vp_metadata.ucode_size);
|
||||||
|
|
||||||
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
|
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
|
||||||
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||||
|
@ -1540,20 +1544,27 @@ namespace rsx
|
||||||
|
|
||||||
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
|
void thread::get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors)
|
||||||
{
|
{
|
||||||
|
if (!m_fragment_program_dirty)
|
||||||
|
return;
|
||||||
|
|
||||||
|
m_fragment_program_dirty = false;
|
||||||
auto &result = current_fragment_program = {};
|
auto &result = current_fragment_program = {};
|
||||||
|
|
||||||
const u32 shader_program = rsx::method_registers.shader_program_address();
|
const u32 shader_program = rsx::method_registers.shader_program_address();
|
||||||
if (shader_program == 0)
|
if (shader_program == 0)
|
||||||
|
{
|
||||||
|
current_fp_metadata = {};
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const u32 program_location = (shader_program & 0x3) - 1;
|
const u32 program_location = (shader_program & 0x3) - 1;
|
||||||
const u32 program_offset = (shader_program & ~0x3);
|
const u32 program_offset = (shader_program & ~0x3);
|
||||||
|
|
||||||
result.addr = vm::base(rsx::get_address(program_offset, program_location));
|
result.addr = vm::base(rsx::get_address(program_offset, program_location));
|
||||||
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
|
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
|
||||||
|
|
||||||
result.addr = ((u8*)result.addr + program_info.program_start_offset);
|
result.addr = ((u8*)result.addr + current_fp_metadata.program_start_offset);
|
||||||
result.offset = program_offset + program_info.program_start_offset;
|
result.offset = program_offset + current_fp_metadata.program_start_offset;
|
||||||
result.valid = true;
|
result.valid = true;
|
||||||
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
|
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
|
||||||
result.unnormalized_coords = 0;
|
result.unnormalized_coords = 0;
|
||||||
|
@ -1574,7 +1585,7 @@ namespace rsx
|
||||||
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
|
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
|
||||||
result.texture_scale[i][2] = (f32)tex.remap(); //Debug value
|
result.texture_scale[i][2] = (f32)tex.remap(); //Debug value
|
||||||
|
|
||||||
if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
|
if (tex.enabled() && (current_fp_metadata.referenced_textures_mask & (1 << i)))
|
||||||
{
|
{
|
||||||
u32 texture_control = 0;
|
u32 texture_control = 0;
|
||||||
result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1));
|
result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1));
|
||||||
|
|
|
@ -331,6 +331,8 @@ namespace rsx
|
||||||
bool m_textures_dirty[16];
|
bool m_textures_dirty[16];
|
||||||
bool m_vertex_textures_dirty[4];
|
bool m_vertex_textures_dirty[4];
|
||||||
bool m_framebuffer_state_contested = false;
|
bool m_framebuffer_state_contested = false;
|
||||||
|
bool m_fragment_program_dirty = false;
|
||||||
|
bool m_vertex_program_dirty = false;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::array<u32, 4> get_color_surface_addresses() const;
|
std::array<u32, 4> get_color_surface_addresses() const;
|
||||||
|
@ -344,6 +346,9 @@ namespace rsx
|
||||||
RSXVertexProgram current_vertex_program = {};
|
RSXVertexProgram current_vertex_program = {};
|
||||||
RSXFragmentProgram current_fragment_program = {};
|
RSXFragmentProgram current_fragment_program = {};
|
||||||
|
|
||||||
|
program_hash_util::fragment_program_utils::fragment_program_metadata current_fp_metadata = {};
|
||||||
|
program_hash_util::vertex_program_utils::vertex_program_metadata current_vp_metadata = {};
|
||||||
|
|
||||||
void get_current_vertex_program();
|
void get_current_vertex_program();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -2208,11 +2208,14 @@ bool VKGSRender::check_program_status()
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
|
void VKGSRender::load_program(const vk::vertex_upload_info& vertex_info)
|
||||||
|
{
|
||||||
|
if (m_fragment_program_dirty || m_vertex_program_dirty)
|
||||||
{
|
{
|
||||||
get_current_fragment_program(fs_sampler_state);
|
get_current_fragment_program(fs_sampler_state);
|
||||||
verify(HERE), current_fragment_program.valid;
|
verify(HERE), current_fragment_program.valid;
|
||||||
|
|
||||||
get_current_vertex_program();
|
get_current_vertex_program();
|
||||||
|
}
|
||||||
|
|
||||||
auto &vertex_program = current_vertex_program;
|
auto &vertex_program = current_vertex_program;
|
||||||
auto &fragment_program = current_fragment_program;
|
auto &fragment_program = current_fragment_program;
|
||||||
|
|
|
@ -357,9 +357,21 @@ namespace rsx
|
||||||
static void impl(thread* rsx, u32 _reg, u32 arg)
|
static void impl(thread* rsx, u32 _reg, u32 arg)
|
||||||
{
|
{
|
||||||
method_registers.commit_4_transform_program_instructions(index);
|
method_registers.commit_4_transform_program_instructions(index);
|
||||||
|
rsx->m_vertex_program_dirty = true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void set_transform_program_start(thread* rsx, u32, u32)
|
||||||
|
{
|
||||||
|
rsx->m_vertex_program_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_vertex_attribute_output_mask(thread* rsx, u32, u32)
|
||||||
|
{
|
||||||
|
rsx->m_vertex_program_dirty = true;
|
||||||
|
rsx->m_fragment_program_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
void set_begin_end(thread* rsxthr, u32 _reg, u32 arg)
|
void set_begin_end(thread* rsxthr, u32 _reg, u32 arg)
|
||||||
{
|
{
|
||||||
if (arg)
|
if (arg)
|
||||||
|
@ -521,6 +533,11 @@ namespace rsx
|
||||||
rsx->sync();
|
rsx->sync();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void invalidate_L2(thread* rsx, u32, u32)
|
||||||
|
{
|
||||||
|
rsx->m_fragment_program_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
void set_surface_dirty_bit(thread* rsx, u32, u32)
|
void set_surface_dirty_bit(thread* rsx, u32, u32)
|
||||||
{
|
{
|
||||||
rsx->m_rtts_dirty = true;
|
rsx->m_rtts_dirty = true;
|
||||||
|
@ -539,6 +556,7 @@ namespace rsx
|
||||||
static void impl(thread* rsx, u32 _reg, u32 arg)
|
static void impl(thread* rsx, u32 _reg, u32 arg)
|
||||||
{
|
{
|
||||||
rsx->m_textures_dirty[index] = true;
|
rsx->m_textures_dirty[index] = true;
|
||||||
|
rsx->m_fragment_program_dirty = true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -565,6 +583,8 @@ namespace rsx
|
||||||
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
|
const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x << 2);
|
||||||
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
|
u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + index * 4, method_registers.blit_engine_output_location_nv3062());
|
||||||
vm::write32(address, arg);
|
vm::write32(address, arg);
|
||||||
|
|
||||||
|
rsx->m_fragment_program_dirty = true;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -1711,6 +1731,9 @@ namespace rsx
|
||||||
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
|
||||||
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
|
bind<NV4097_ZCULL_SYNC, nv4097::sync>();
|
||||||
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
|
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
|
||||||
|
bind<NV4097_INVALIDATE_L2, nv4097::invalidate_L2>();
|
||||||
|
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
||||||
|
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
|
||||||
|
|
||||||
//NV308A
|
//NV308A
|
||||||
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
|
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue