SPU: Refactor function discovery

This commit is contained in:
Eladash 2023-08-29 14:50:50 +03:00 committed by Elad Ashkenazi
parent 105c5759f3
commit 37212a632c
4 changed files with 35 additions and 26 deletions

View file

@ -383,7 +383,7 @@ void spu_load_exec(const spu_exec_object& elf)
spu->status_npc = {SPU_STATUS_RUNNING, elf.header.e_entry}; spu->status_npc = {SPU_STATUS_RUNNING, elf.header.e_entry};
atomic_storage<u32>::release(spu->pc, elf.header.e_entry); atomic_storage<u32>::release(spu->pc, elf.header.e_entry);
const auto funcs = spu->discover_functions(spu->ls, umax); const auto funcs = spu->discover_functions(0, { spu->ls , SPU_LS_SIZE }, true, umax);
for (u32 addr : funcs) for (u32 addr : funcs)
{ {

View file

@ -561,10 +561,7 @@ extern void utilize_spu_data_segment(u32 vaddr, const void* ls_data_vaddr, u32 s
spu_section_data::data_t obj{vaddr, std::move(data)}; spu_section_data::data_t obj{vaddr, std::move(data)};
std::vector<u8> ls_data(SPU_LS_SIZE); obj.funcs = spu_thread::discover_functions(vaddr, { reinterpret_cast<const u8*>(ls_data_vaddr), size }, true, umax);
std::memcpy(ls_data.data() + vaddr, ls_data_vaddr, size);
obj.funcs = spu_thread::discover_functions(ls_data.data(), umax);
if (obj.funcs.empty()) if (obj.funcs.empty())
{ {
@ -2111,7 +2108,7 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
} }
} }
std::vector<u32> spu_thread::discover_functions(const void* ls_start, u32 /*entry*/) std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
{ {
std::vector<u32> calls; std::vector<u32> calls;
calls.reserve(100); calls.reserve(100);
@ -2119,14 +2116,16 @@ std::vector<u32> spu_thread::discover_functions(const void* ls_start, u32 /*entr
// Discover functions // Discover functions
// Use the most simple method: search for instructions that calls them // Use the most simple method: search for instructions that calls them
// And then filter invalid cases (does not detect tail calls) // And then filter invalid cases (does not detect tail calls)
for (u32 i = 0x10; i < SPU_LS_SIZE; i += 0x10) const v128 brasl_mask = is_known_addr ? v128::from32p(0x62) : v128::from32p(umax);
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
{ {
// Search for BRSL and BRASL // Search for BRSL and BRASL
// TODO: BISL // TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(static_cast<const u8*>(ls_start), i); const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 shifted = gv_shr32(inst, 23); const v128 shifted = gv_shr32(inst, 23);
const v128 eq_brsl = gv_eq32(shifted, v128::from32p(0x66)); const v128 eq_brsl = gv_eq32(shifted, v128::from32p(0x66));
const v128 eq_brasl = gv_eq32(shifted, v128::from32p(0x62)); const v128 eq_brasl = gv_eq32(shifted, brasl_mask);
const v128 result = eq_brsl | eq_brasl; const v128 result = eq_brsl | eq_brasl;
if (!gv_testz(result)) if (!gv_testz(result))
@ -2144,14 +2143,14 @@ std::vector<u32> spu_thread::discover_functions(const void* ls_start, u32 /*entr
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller) calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
{ {
// Check the validity of both the callee code and the following caller code // Check the validity of both the callee code and the following caller code
return !is_exec_code(caller, ls_start) || !is_exec_code(caller + 4, ls_start); return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
}), calls.end()); }), calls.end());
std::vector<u32> addrs; std::vector<u32> addrs;
for (u32 addr : calls) for (u32 addr : calls)
{ {
const spu_opcode_t op{read_from_ptr<be_t<u32>>(static_cast<const u8*>(ls_start), addr)}; const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
const u32 func = op_branch_targets(addr, op)[0]; const u32 func = op_branch_targets(addr, op)[0];

View file

@ -1196,7 +1196,7 @@ void spu_thread::dump_regs(std::string& ret, std::any& /*custom_data*/) const
} }
} }
if (i3 >= 0x80 && is_exec_code(i3, ls)) if (i3 >= 0x80 && is_exec_code(i3, { ls, SPU_LS_SIZE }))
{ {
dis_asm.disasm(i3); dis_asm.disasm(i3);
fmt::append(ret, " -> %s", dis_asm.last_opcode); fmt::append(ret, " -> %s", dis_asm.last_opcode);
@ -1300,7 +1300,7 @@ std::vector<std::pair<u32, u32>> spu_thread::dump_callstack_list() const
return true; return true;
} }
return !addr || !is_exec_code(addr, ls); return !addr || !is_exec_code(addr, { ls, SPU_LS_SIZE });
}; };
if (first && lr._u32[3] != gpr0._u32[3] && !is_invalid(gpr0)) if (first && lr._u32[3] != gpr0._u32[3] && !is_invalid(gpr0))
@ -4019,17 +4019,22 @@ bool spu_thread::check_mfc_interrupts(u32 next_pc)
return false; return false;
} }
bool spu_thread::is_exec_code(u32 addr, const void* ls_ptr) bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr)
{ {
if (addr & ~0x3FFFC)
{
return false;
}
for (u32 i = 0; i < 30; i++) for (u32 i = 0; i < 30; i++)
{ {
if (addr & ~0x3FFFC)
{
return false;
}
if (addr < base_addr || addr >= base_addr + ls_ptr.size())
{
return false;
}
const u32 addr0 = spu_branch_target(addr); const u32 addr0 = spu_branch_target(addr);
const u32 op = read_from_ptr<be_t<u32>>(static_cast<const u8*>(ls_ptr) + addr0); const u32 op = read_from_ptr<be_t<u32>>(ls_ptr, addr0 - base_addr);
const auto type = s_spu_itype.decode(op); const auto type = s_spu_itype.decode(op);
if (type == spu_itype::UNK || !op) if (type == spu_itype::UNK || !op)
@ -4055,9 +4060,14 @@ bool spu_thread::is_exec_code(u32 addr, const void* ls_ptr)
continue; continue;
} }
if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size())
{
return false;
}
// Test the validity of a single instruction of the optional target // Test the validity of a single instruction of the optional target
// This function can't be too slow and is unlikely to improve results by a great deal // This function can't be too slow and is unlikely to improve results by a great deal
const u32 op0 = read_from_ptr<be_t<u32>>(static_cast<const u8*>(ls_ptr) + route_pc); const u32 op0 = read_from_ptr<be_t<u32>>(ls_ptr, route_pc - base_addr);
const auto type0 = s_spu_itype.decode(op); const auto type0 = s_spu_itype.decode(op);
if (type == spu_itype::UNK || !op) if (type == spu_itype::UNK || !op)
@ -6151,12 +6161,12 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span<spu_memory_segment_d
{ {
for (pc0 = pc_hint; pc0; pc0 -= 4) for (pc0 = pc_hint; pc0; pc0 -= 4)
{ {
const u32 op = read_from_ptr<be_t<u32>>(all_data.data(), pc0 - 4); const u32 op = read_from_ptr<be_t<u32>>(all_data, pc0 - 4);
// Try to find function entry (if they are placed sequentially search for BI $LR of previous function) // Try to find function entry (if they are placed sequentially search for BI $LR of previous function)
if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK) if (!op || op == 0x35000000u || s_spu_itype.decode(op) == spu_itype::UNK)
{ {
if (is_exec_code(pc0, all_data.data())) if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE }))
break; break;
} }
} }
@ -6166,7 +6176,7 @@ spu_exec_object spu_thread::capture_memory_as_elf(std::span<spu_memory_segment_d
for (pc0 = 0; pc0 < SPU_LS_SIZE; pc0 += 4) for (pc0 = 0; pc0 < SPU_LS_SIZE; pc0 += 4)
{ {
// Try to find a function entry (very basic) // Try to find a function entry (very basic)
if (is_exec_code(pc0, all_data.data())) if (is_exec_code(pc0, { all_data.data(), SPU_LS_SIZE }))
break; break;
} }
} }

View file

@ -825,8 +825,8 @@ public:
void set_events(u32 bits); void set_events(u32 bits);
void set_interrupt_status(bool enable); void set_interrupt_status(bool enable);
bool check_mfc_interrupts(u32 next_pc); bool check_mfc_interrupts(u32 next_pc);
static bool is_exec_code(u32 addr, const void* ls_ptr); // Only a hint, do not rely on it other than debugging purposes static bool is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr = 0); // Only a hint, do not rely on it other than debugging purposes
static std::vector<u32> discover_functions(const void* ls_start, u32 /*entry*/); static std::vector<u32> discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/);
u32 get_ch_count(u32 ch); u32 get_ch_count(u32 ch);
s64 get_ch_value(u32 ch); s64 get_ch_value(u32 ch);
bool set_ch_value(u32 ch, u32 value); bool set_ch_value(u32 ch, u32 value);