rsx: Improve fragment and vertex program usage

- Introduces a gpu program analyser step to examine shader contents before attempting compilation or cache search
  - Avoids detecting shader as being different because of unused textures having state changes
  - Adds better program size detection for vertex programs
- Improved vertex program decompiler
  - Properly support CAL type instructions
  - Support jumping over instructions marked with a termination marker with BRA/CAL class opcodes
  - Fix SRC checks and abort
  - Fix CC register initialization
  - NOTE: Even unused SRC registers have to be valid (usually referencing in.POS)
This commit is contained in:
kd-11 2018-03-20 14:14:45 +03:00 committed by kd-11
parent 75b40931fc
commit a52ea7f870
9 changed files with 287 additions and 206 deletions

View file

@ -1338,23 +1338,11 @@ namespace rsx
u32* ucode_src = rsx::method_registers.transform_program.data() + (transform_program_start * 4);
u32* ucode_dst = current_vertex_program.data.data();
u32 ucode_size = 0;
D3 d3;
for (int i = transform_program_start; i < 512; ++i)
{
ucode_size += 4;
memcpy(ucode_dst, ucode_src, 4 * sizeof(u32));
memcpy(ucode_dst, ucode_src, current_vertex_program.data.size() * sizeof(u32));
d3.HEX = ucode_src[3];
if (d3.end)
break;
ucode_src += 4;
ucode_dst += 4;
}
current_vertex_program.data.resize(ucode_size);
auto program_info = program_hash_util::vertex_program_utils::analyse_vertex_program(current_vertex_program.data);
current_vertex_program.data.resize(program_info.ucode_size);
const u32 input_mask = rsx::method_registers.vertex_attrib_input_mask();
const u32 modulo_mask = rsx::method_registers.frequency_divider_operation_mask();
@ -1562,10 +1550,10 @@ namespace rsx
const u32 program_offset = (shader_program & ~0x3);
result.addr = vm::base(rsx::get_address(program_offset, program_location));
auto program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(result.addr);
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
result.addr = ((u8*)result.addr + program_start);
result.offset = program_offset + program_start;
result.addr = ((u8*)result.addr + program_info.program_start_offset);
result.offset = program_offset + program_info.program_start_offset;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;
@ -1577,7 +1565,6 @@ namespace rsx
result.redirected_textures = 0;
result.shadow_textures = 0;
std::array<texture_dimension_extended, 16> texture_dimensions;
const auto resolution_scale = rsx::get_resolution_scale();
for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
@ -1587,14 +1574,10 @@ namespace rsx
result.texture_scale[i][1] = sampler_descriptors[i]->scale_y;
result.texture_scale[i][2] = (f32)tex.remap(); //Debug value
if (!tex.enabled())
{
texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d;
}
else
if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
{
u32 texture_control = 0;
texture_dimensions[i] = sampler_descriptors[i]->image_type;
result.texture_dimensions |= ((u32)sampler_descriptors[i]->image_type << (i << 1));
if (tex.alpha_kill_enabled())
{
@ -1669,8 +1652,6 @@ namespace rsx
}
}
result.set_texture_dimension(texture_dimensions);
//Sanity checks
if (result.ctrl & CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT)
{
@ -1694,10 +1675,10 @@ namespace rsx
const u32 program_offset = (shader_program & ~0x3);
result.addr = vm::base(rsx::get_address(program_offset, program_location));
auto program_start = program_hash_util::fragment_program_utils::get_fragment_program_start(result.addr);
auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
result.addr = ((u8*)result.addr + program_start);
result.offset = program_offset + program_start;
result.addr = ((u8*)result.addr + program_info.program_start_offset);
result.offset = program_offset + program_info.program_start_offset;
result.valid = true;
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
result.unnormalized_coords = 0;
@ -1709,7 +1690,6 @@ namespace rsx
result.redirected_textures = 0;
result.shadow_textures = 0;
std::array<texture_dimension_extended, 16> texture_dimensions;
const auto resolution_scale = rsx::get_resolution_scale();
for (u32 i = 0; i < rsx::limits::fragment_textures_count; ++i)
@ -1720,13 +1700,9 @@ namespace rsx
result.textures_alpha_kill[i] = 0;
result.textures_zfunc[i] = 0;
if (!tex.enabled())
if (tex.enabled() && (program_info.referenced_textures_mask & (1 << i)))
{
texture_dimensions[i] = texture_dimension_extended::texture_dimension_2d;
}
else
{
texture_dimensions[i] = tex.get_extended_texture_dimension();
result.texture_dimensions |= ((u32)tex.get_extended_texture_dimension() << (i << 1));
if (tex.alpha_kill_enabled())
{
@ -1801,8 +1777,6 @@ namespace rsx
}
}
}
result.set_texture_dimension(texture_dimensions);
}
void thread::reset()