rsx: vp decompiler fixes

- Fix program abort logic to never abort before resolving later label addresses
  Fixes jumping over broken code and jumping over END markers
- TEXTURE_CONTROL2 has indexing range of [0..15] without stride skipping!
  This register does not have interleaving with other texture registers
- Track shader address poke as it seems to invalidate programs as well
This commit is contained in:
kd-11 2018-07-01 20:55:49 +03:00 committed by kd-11
parent 66854b78fa
commit bd915bfebd
2 changed files with 33 additions and 17 deletions

View file

@ -428,6 +428,7 @@ std::string VertexProgramDecompiler::Decompile()
bool is_has_BRA = false; bool is_has_BRA = false;
bool program_end = false; bool program_end = false;
u32 i = 1; u32 i = 1;
u32 last_label_addr = 0;
while (i < m_data.size()) while (i < m_data.size())
{ {
@ -443,31 +444,38 @@ std::string VertexProgramDecompiler::Decompile()
switch (d1.sca_opcode) switch (d1.sca_opcode)
{ {
case RSX_SCA_OPCODE_BRA: case RSX_SCA_OPCODE_BRA:
{
LOG_ERROR(RSX, "Unimplemented VP opcode BRA"); LOG_ERROR(RSX, "Unimplemented VP opcode BRA");
is_has_BRA = true; is_has_BRA = true;
m_jump_lvls.clear(); m_jump_lvls.clear();
d3.HEX = m_data[++i]; d3.HEX = m_data[++i];
i += 4; i += 4;
break; break;
}
case RSX_SCA_OPCODE_BRB: case RSX_SCA_OPCODE_BRB:
case RSX_SCA_OPCODE_BRI: case RSX_SCA_OPCODE_BRI:
case RSX_SCA_OPCODE_CAL: case RSX_SCA_OPCODE_CAL:
case RSX_SCA_OPCODE_CLI: case RSX_SCA_OPCODE_CLI:
case RSX_SCA_OPCODE_CLB: case RSX_SCA_OPCODE_CLB:
{
d2.HEX = m_data[i++]; d2.HEX = m_data[i++];
d3.HEX = m_data[i]; d3.HEX = m_data[i];
i += 2; i += 2;
m_jump_lvls.emplace(GetAddr());
break;
const u32 label_addr = GetAddr();
last_label_addr = std::max(last_label_addr, label_addr);
m_jump_lvls.emplace(label_addr);
break;
}
default: default:
{
d3.HEX = m_data[++i]; d3.HEX = m_data[++i];
i += 2; i += 2;
break; break;
} }
} }
} }
}
uint jump_position = 0; uint jump_position = 0;
if (is_has_BRA || !m_jump_lvls.empty()) if (is_has_BRA || !m_jump_lvls.empty())
@ -565,8 +573,7 @@ std::string VertexProgramDecompiler::Decompile()
if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type) if (!src[0].reg_type || !src[1].reg_type || !src[2].reg_type)
{ {
AddCode("//Src check failed. Aborting"); AddCode("//Src check failed. Aborting");
do_program_exit(true); program_end = true;
break;
} }
if (m_call_stack.empty()) if (m_call_stack.empty())
@ -584,8 +591,6 @@ std::string VertexProgramDecompiler::Decompile()
} }
} }
program_end = !!d3.end;
switch (d1.vec_opcode) switch (d1.vec_opcode)
{ {
case RSX_VEC_OPCODE_NOP: break; case RSX_VEC_OPCODE_NOP: break;
@ -640,7 +645,7 @@ std::string VertexProgramDecompiler::Decompile()
{ {
if (m_call_stack.empty()) if (m_call_stack.empty())
{ {
AddCode("$if ($cond) //BRA"); AddCode("$ifcond //BRA");
AddCode("{"); AddCode("{");
m_cur_instr->open_scopes++; m_cur_instr->open_scopes++;
AddCode("jump_position = $a$am;"); AddCode("jump_position = $a$am;");
@ -698,8 +703,6 @@ std::string VertexProgramDecompiler::Decompile()
case RSX_SCA_OPCODE_BRB: case RSX_SCA_OPCODE_BRB:
// works differently (BRB o[1].x !b0, L0;) // works differently (BRB o[1].x !b0, L0;)
{ {
LOG_WARNING(RSX, "sca_opcode BRB, d0=0x%X, d1=0x%X, d2=0x%X, d3=0x%X", d0.HEX, d1.HEX, d2.HEX, d3.HEX);
if (m_call_stack.empty()) if (m_call_stack.empty())
{ {
u32 jump_position = find_jump_lvl(GetAddr()); u32 jump_position = find_jump_lvl(GetAddr());
@ -742,12 +745,22 @@ std::string VertexProgramDecompiler::Decompile()
break; break;
} }
if (program_end) if (program_end || !!d3.end)
{ {
do_program_exit(!d3.end); do_program_exit(!d3.end);
if (i >= last_label_addr)
{
if ((i + 1) < m_instr_count)
{
// In rare cases, this might be harmless (large coalesced program blocks controlled via branches aka ubershaders)
LOG_ERROR(RSX, "Vertex program aborted prematurely. Expect glitches");
}
break; break;
} }
} }
}
if (is_has_BRA || !m_jump_lvls.empty()) if (is_has_BRA || !m_jump_lvls.empty())
{ {

View file

@ -340,9 +340,11 @@ namespace rsx
static constexpr u32 reg = index / 4; static constexpr u32 reg = index / 4;
static constexpr u8 subreg = index % 4; static constexpr u8 subreg = index % 4;
u32 load = rsx::method_registers.transform_constant_load(); const u32 load = rsx::method_registers.transform_constant_load();
if ((load + index) >= 512) const u32 address = load + reg;
if (address >= 468)
{ {
// Ignore addresses outside the usable [0, 467] range
LOG_ERROR(RSX, "Invalid transform register index (load=%d, index=%d)", load, index); LOG_ERROR(RSX, "Invalid transform register index (load=%d, index=%d)", load, index);
return; return;
} }
@ -538,7 +540,7 @@ namespace rsx
rsx->sync(); rsx->sync();
} }
void invalidate_L2(thread* rsx, u32, u32) void set_shader_program_dirty(thread* rsx, u32, u32)
{ {
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
} }
@ -1757,7 +1759,7 @@ namespace rsx
bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_ADDRESS, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL0, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_CONTROL0, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL1, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_CONTROL1, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL2, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_CONTROL2, 1, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_CONTROL3, 1, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_CONTROL3, 1, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_FILTER, 8, 16, nv4097::set_texture_dirty_bit>();
bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>(); bind_range<NV4097_SET_TEXTURE_IMAGE_RECT, 8, 16, nv4097::set_texture_dirty_bit>();
@ -1782,7 +1784,8 @@ namespace rsx
bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>(); bind<NV4097_WAIT_FOR_IDLE, nv4097::sync>();
bind<NV4097_ZCULL_SYNC, nv4097::sync>(); bind<NV4097_ZCULL_SYNC, nv4097::sync>();
bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>(); bind<NV4097_SET_CONTEXT_DMA_REPORT, nv4097::sync>();
bind<NV4097_INVALIDATE_L2, nv4097::invalidate_L2>(); bind<NV4097_INVALIDATE_L2, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>(); bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>(); bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();