Embedded SPU Image Search Fixes

This commit is contained in:
elad335 2025-04-28 16:11:08 +03:00 committed by Elad
parent 0e1cad4ca9
commit 730249a887
3 changed files with 62 additions and 51 deletions

View file

@ -1171,7 +1171,9 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
auto find_first_of_multiple = [](std::string_view data, std::initializer_list<std::string_view> values, usz index) auto find_first_of_multiple = [](std::string_view data, std::initializer_list<std::string_view> values, usz index)
{ {
u32 pos = static_cast<u32>(data.size()); u32 pos = umax;
ensure(data.size() <= pos && index <= data.size());
for (std::string_view value : values) for (std::string_view value : values)
{ {
@ -1191,65 +1193,74 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
u32 prev_bound = 0; u32 prev_bound = 0;
for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, i + 4)) for (u32 prefix_addr = find_first_of_multiple(seg_view, prefixes, 0); prefix_addr < seg.size; prefix_addr = find_first_of_multiple(seg_view, prefixes, prefix_addr + 4))
{ {
const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + i)); const auto elf_header = ensure(mod.get_ptr<u8>(seg.addr + prefix_addr));
if (std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0) if (std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0)
{ {
bool next = true; const u32 old_prefix_addr = prefix_addr;
const u32 old_i = i;
u32 guid_start = umax, guid_end = umax;
for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries = search == 0 ? 0 : tries - 1, search = utils::sub_saturate<u32>(search, 128)) auto search_guid_pattern = [&](u32 index, std::string_view data_span, s32 advance_index, u32 lower_bound, u32 uppper_bound) -> u32
{ {
if (seg_view[search] != 0x42 && seg_view[search] != 0x43) for (u32 search = index & -16, tries = 16 * 64; tries && search >= lower_bound && search < uppper_bound; tries = tries - 1, search = advance_index < 0 ? utils::sub_saturate<u32>(search, 0 - advance_index) : search + advance_index)
{ {
continue; if (seg_view[search] != 0x42 && seg_view[search] != 0x43)
{
continue;
}
const u32 inst1 = read_from_ptr<be_t<u32>>(data_span, search);
const u32 inst2 = read_from_ptr<be_t<u32>>(data_span, search + 4);
const u32 inst3 = read_from_ptr<be_t<u32>>(data_span, search + 8);
const u32 inst4 = read_from_ptr<be_t<u32>>(data_span, search + 12);
if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002)
{
continue;
}
if (!spu_thread::is_exec_code(search, {reinterpret_cast<const u8*>(data_span.data()), data_span.size()}, 0, true, true))
{
continue;
}
return search;
} }
const u32 inst1 = read_from_ptr<be_t<u32>>(seg_view, search); return umax;
const u32 inst2 = read_from_ptr<be_t<u32>>(seg_view, search + 4); };
const u32 inst3 = read_from_ptr<be_t<u32>>(seg_view, search + 8);
const u32 inst4 = read_from_ptr<be_t<u32>>(seg_view, search + 12);
if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002) prefix_addr = search_guid_pattern(prefix_addr, seg_view, -16, prev_bound, seg.size);
{
continue;
}
guid_start = search + seg.addr; if (prefix_addr == umax)
i = search;
next = false;
break;
}
if (next)
{ {
prefix_addr = old_prefix_addr;
continue; continue;
} }
std::string_view ls_segment = seg_view.substr(i); u32 guid_start = seg.addr + prefix_addr, guid_end = umax;
std::string_view ls_segment = seg_view.substr(prefix_addr);
// Bound to a bit less than LS size // Bound to a bit less than LS size
ls_segment = ls_segment.substr(0, SPU_LS_SIZE - 0x8000); ls_segment = ls_segment.substr(0, SPU_LS_SIZE - 0x8000);
for (u32 addr_last = 0, valid_count = 0, invalid_count = 0;;) for (u32 addr_last = 0, valid_count = 0, invalid_count = 0;;)
{ {
const u32 instruction = find_first_of_multiple(ls_segment, prefixes, addr_last); const u32 instruction = std::min<u32>(search_guid_pattern(addr_last, ls_segment, +16, 0, ::size32(ls_segment)), find_first_of_multiple(ls_segment, prefixes, addr_last));
if (instruction != umax && std::memcmp(ls_segment.data() + instruction, "\x24\0\x40\x80", 4) == 0) if (instruction != umax && std::memcmp(ls_segment.data() + instruction, "\x24\0\x40\x80", 4) == 0)
{ {
if (instruction % 4 != i % 4) if (instruction % 4 != prefix_addr % 4)
{ {
// Unaligned, continue // Unaligned, continue
addr_last = instruction + (i % 4 - instruction % 4) % 4; addr_last = instruction + (prefix_addr % 4 - instruction % 4) % 4;
continue; continue;
} }
// FIXME: This seems to terminate SPU code prematurely in some cases // Check execution compatibility
// Likely due to absolute branches if (spu_thread::is_exec_code(instruction, {reinterpret_cast<const u8*>(ls_segment.data()), ls_segment.size()}, 0, true, true))
if (spu_thread::is_exec_code(instruction, {reinterpret_cast<const u8*>(ls_segment.data()), ls_segment.size()}, 0))
{ {
addr_last = instruction + 4; addr_last = instruction + 4;
valid_count++; valid_count++;
@ -1270,7 +1281,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
if (addr_last >= 0x80 && valid_count >= 2) if (addr_last >= 0x80 && valid_count >= 2)
{ {
u32 end = std::min<u32>({instruction, seg.size - i, utils::align<u32>(addr_last + 256, 128)}); u32 end = std::min<u32>({instruction, seg.size - prefix_addr, utils::align<u32>(addr_last + 256, 128)});
u32 guessed_ls_addr = 0; u32 guessed_ls_addr = 0;
@ -1320,27 +1331,27 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
if (guessed_ls_addr) if (guessed_ls_addr)
{ {
end = i + std::min<u32>(end, SPU_LS_SIZE - guessed_ls_addr); end = prefix_addr + std::min<u32>(end, SPU_LS_SIZE - guessed_ls_addr);
} }
else else
{ {
end = i + std::min<u32>(end, SPU_LS_SIZE); end = prefix_addr + std::min<u32>(end, SPU_LS_SIZE);
} }
ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x, GUID=0x%05x..0x%05x)", seg.addr + i, seg.addr + end, guessed_ls_addr, guid_start, guid_end); ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x, GUID=0x%05x..0x%05x)", seg.addr + prefix_addr, seg.addr + end, guessed_ls_addr, guid_start, guid_end);
if (!is_firmware && _main == &mod) if (!is_firmware && _main == &mod)
{ {
// Siginify that the base address is unknown by passing 0 // Siginify that the base address is unknown by passing 0
utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + i, end - i); utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + prefix_addr, end - prefix_addr);
} }
i = std::max<u32>(end, i + 4) - 4; prefix_addr = std::max<u32>(end, prefix_addr + 4) - 4;
prev_bound = i + 4; prev_bound = prefix_addr + 4;
} }
else else
{ {
i = old_i; prefix_addr = old_prefix_addr;
} }
break; break;
@ -1350,7 +1361,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
} }
// Try to load SPU image // Try to load SPU image
const spu_exec_object obj(fs::file(elf_header, seg.size - i)); const spu_exec_object obj(fs::file(elf_header, seg.size - prefix_addr));
if (obj != elf_error::ok) if (obj != elf_error::ok)
{ {
@ -1395,7 +1406,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz); sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz);
// We assume that the string SPUNAME exists 0x14 bytes into the NOTE segment // We assume that the string SPUNAME exists 0x14 bytes into the NOTE segment
name = ensure(mod.get_ptr<const char>(seg.addr + i + prog.p_offset + 0x14)); name = ensure(mod.get_ptr<const char>(seg.addr + prefix_addr + prog.p_offset + 0x14));
if (!name.empty()) if (!name.empty())
{ {
@ -1404,7 +1415,7 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
} }
} }
fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + i, obj.highest_offset); fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + prefix_addr, obj.highest_offset);
sha1_finish(&sha2, sha1_hash); sha1_finish(&sha2, sha1_hash);
@ -1454,8 +1465,8 @@ static void ppu_check_patch_spu_images(const ppu_module<lv2_obj>& mod, const ppu
ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump); ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump);
} }
i += ::narrow<u32>(obj.highest_offset - 4); prefix_addr += ::narrow<u32>(obj.highest_offset - 4);
prev_bound = i + 4; prev_bound = prefix_addr + 4;
} }
} }

View file

@ -4327,7 +4327,7 @@ bool spu_thread::check_mfc_interrupts(u32 next_pc)
return false; return false;
} }
bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr, bool avoid_dead_code) bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr, bool avoid_dead_code, bool is_range_limited)
{ {
bool had_conditional = false; bool had_conditional = false;
@ -4335,12 +4335,12 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
{ {
if (addr & ~0x3FFFC) if (addr & ~0x3FFFC)
{ {
return false; return is_range_limited;
} }
if (addr < base_addr || addr >= base_addr + ls_ptr.size()) if (addr < base_addr || addr >= base_addr + ls_ptr.size())
{ {
return false; return is_range_limited;
} }
const u32 addr0 = spu_branch_target(addr); const u32 addr0 = spu_branch_target(addr);
@ -4450,12 +4450,12 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
{ {
if (addr < 0u - rel) if (addr < 0u - rel)
{ {
return false; return is_range_limited;
} }
} }
else if (SPU_LS_SIZE - addr <= rel + 0u) else if (SPU_LS_SIZE - addr <= rel + 0u)
{ {
return false; return is_range_limited;
} }
if (type == spu_itype::BRSL) if (type == spu_itype::BRSL)
@ -4485,7 +4485,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size()) if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size())
{ {
return false; return is_range_limited;
} }
// Test the validity of a single instruction of the optional target // Test the validity of a single instruction of the optional target

View file

@ -843,7 +843,7 @@ public:
void set_events(u32 bits); void set_events(u32 bits);
void set_interrupt_status(bool enable); void set_interrupt_status(bool enable);
bool check_mfc_interrupts(u32 next_pc); bool check_mfc_interrupts(u32 next_pc);
static bool is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr = 0, bool avoid_dead_code = false); // Only a hint, do not rely on it other than debugging purposes static bool is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_addr = 0, bool avoid_dead_code = false, bool is_range_limited = false); // A hint, do not rely on it for true execution compatibility
static std::vector<u32> discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/); static std::vector<u32> discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/);
u32 get_ch_count(u32 ch); u32 get_ch_count(u32 ch);
s64 get_ch_value(u32 ch); s64 get_ch_value(u32 ch);