diff --git a/rpcs3/Emu/Cell/PPUModule.cpp b/rpcs3/Emu/Cell/PPUModule.cpp index 59d5b1997f..799789af63 100644 --- a/rpcs3/Emu/Cell/PPUModule.cpp +++ b/rpcs3/Emu/Cell/PPUModule.cpp @@ -1171,7 +1171,9 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu auto find_first_of_multiple = [](std::string_view data, std::initializer_list values, usz index) { - u32 pos = static_cast(data.size()); + u32 pos = umax; + + ensure(data.size() <= pos && index <= data.size()); for (std::string_view value : values) { @@ -1191,65 +1193,74 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu u32 prev_bound = 0; - for (u32 i = find_first_of_multiple(seg_view, prefixes, 0); i < seg.size; i = find_first_of_multiple(seg_view, prefixes, i + 4)) + for (u32 prefix_addr = find_first_of_multiple(seg_view, prefixes, 0); prefix_addr < seg.size; prefix_addr = find_first_of_multiple(seg_view, prefixes, prefix_addr + 4)) { - const auto elf_header = ensure(mod.get_ptr(seg.addr + i)); + const auto elf_header = ensure(mod.get_ptr(seg.addr + prefix_addr)); if (std::memcmp(elf_header, "\x24\0\x40\x80", 4) == 0) { - bool next = true; - const u32 old_i = i; - u32 guid_start = umax, guid_end = umax; + const u32 old_prefix_addr = prefix_addr; - for (u32 search = i & -128, tries = 10; tries && search >= prev_bound; tries = search == 0 ? 0 : tries - 1, search = utils::sub_saturate(search, 128)) + auto search_guid_pattern = [&](u32 index, std::string_view data_span, s32 advance_index, u32 lower_bound, u32 uppper_bound) -> u32 { - if (seg_view[search] != 0x42 && seg_view[search] != 0x43) + for (u32 search = index & -16, tries = 16 * 64; tries && search >= lower_bound && search < uppper_bound; tries = tries - 1, search = advance_index < 0 ? utils::sub_saturate(search, 0 - advance_index) : search + advance_index) { - continue; + if (seg_view[search] != 0x42 && seg_view[search] != 0x43) + { + continue; + } + + const u32 inst1 = read_from_ptr>(data_span, search); + const u32 inst2 = read_from_ptr>(data_span, search + 4); + const u32 inst3 = read_from_ptr>(data_span, search + 8); + const u32 inst4 = read_from_ptr>(data_span, search + 12); + + if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002) + { + continue; + } + + if (!spu_thread::is_exec_code(search, {reinterpret_cast(data_span.data()), data_span.size()}, 0, true, true)) + { + continue; + } + + return search; } - const u32 inst1 = read_from_ptr>(seg_view, search); - const u32 inst2 = read_from_ptr>(seg_view, search + 4); - const u32 inst3 = read_from_ptr>(seg_view, search + 8); - const u32 inst4 = read_from_ptr>(seg_view, search + 12); + return umax; + }; - if ((inst1 & 0xfe'00'00'7f) != 0x42000002 || (inst2 & 0xfe'00'00'7f) != 0x42000002 || (inst3 & 0xfe'00'00'7f) != 0x42000002 || (inst4 & 0xfe'00'00'7f) != 0x42000002) - { - continue; - } + prefix_addr = search_guid_pattern(prefix_addr, seg_view, -16, prev_bound, seg.size); - guid_start = search + seg.addr; - i = search; - next = false; - break; - } - - if (next) + if (prefix_addr == umax) { + prefix_addr = old_prefix_addr; continue; } - std::string_view ls_segment = seg_view.substr(i); + u32 guid_start = seg.addr + prefix_addr, guid_end = umax; + + std::string_view ls_segment = seg_view.substr(prefix_addr); // Bound to a bit less than LS size ls_segment = ls_segment.substr(0, SPU_LS_SIZE - 0x8000); for (u32 addr_last = 0, valid_count = 0, invalid_count = 0;;) { - const u32 instruction = find_first_of_multiple(ls_segment, prefixes, addr_last); + const u32 instruction = std::min(search_guid_pattern(addr_last, ls_segment, +16, 0, ::size32(ls_segment)), find_first_of_multiple(ls_segment, prefixes, addr_last)); if (instruction != umax && std::memcmp(ls_segment.data() + instruction, "\x24\0\x40\x80", 4) == 0) { - if (instruction % 4 != i % 4) + if (instruction % 4 != prefix_addr % 4) { // Unaligned, continue - addr_last = instruction + (i % 4 - instruction % 4) % 4; + addr_last = instruction + (prefix_addr % 4 - instruction % 4) % 4; continue; } - // FIXME: This seems to terminate SPU code prematurely in some cases - // Likely due to absolute branches - if (spu_thread::is_exec_code(instruction, {reinterpret_cast(ls_segment.data()), ls_segment.size()}, 0)) + // Check execution compatibility + if (spu_thread::is_exec_code(instruction, {reinterpret_cast(ls_segment.data()), ls_segment.size()}, 0, true, true)) { addr_last = instruction + 4; valid_count++; @@ -1270,7 +1281,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu if (addr_last >= 0x80 && valid_count >= 2) { - u32 end = std::min({instruction, seg.size - i, utils::align(addr_last + 256, 128)}); + u32 end = std::min({instruction, seg.size - prefix_addr, utils::align(addr_last + 256, 128)}); u32 guessed_ls_addr = 0; @@ -1320,27 +1331,27 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu if (guessed_ls_addr) { - end = i + std::min(end, SPU_LS_SIZE - guessed_ls_addr); + end = prefix_addr + std::min(end, SPU_LS_SIZE - guessed_ls_addr); } else { - end = i + std::min(end, SPU_LS_SIZE); + end = prefix_addr + std::min(end, SPU_LS_SIZE); } - ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x, GUID=0x%05x..0x%05x)", seg.addr + i, seg.addr + end, guessed_ls_addr, guid_start, guid_end); + ppu_log.success("Found valid roaming SPU code at 0x%x..0x%x (guessed_ls_addr=0x%x, GUID=0x%05x..0x%05x)", seg.addr + prefix_addr, seg.addr + end, guessed_ls_addr, guid_start, guid_end); if (!is_firmware && _main == &mod) { // Siginify that the base address is unknown by passing 0 - utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + i, end - i); + utilize_spu_data_segment(guessed_ls_addr ? guessed_ls_addr : 0x4000, seg_view.data() + prefix_addr, end - prefix_addr); } - i = std::max(end, i + 4) - 4; - prev_bound = i + 4; + prefix_addr = std::max(end, prefix_addr + 4) - 4; + prev_bound = prefix_addr + 4; } else { - i = old_i; + prefix_addr = old_prefix_addr; } break; @@ -1350,7 +1361,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu } // Try to load SPU image - const spu_exec_object obj(fs::file(elf_header, seg.size - i)); + const spu_exec_object obj(fs::file(elf_header, seg.size - prefix_addr)); if (obj != elf_error::ok) { @@ -1395,7 +1406,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu sha1_update(&sha2, (elf_header + prog.p_offset), prog.p_filesz); // We assume that the string SPUNAME exists 0x14 bytes into the NOTE segment - name = ensure(mod.get_ptr(seg.addr + i + prog.p_offset + 0x14)); + name = ensure(mod.get_ptr(seg.addr + prefix_addr + prog.p_offset + 0x14)); if (!name.empty()) { @@ -1404,7 +1415,7 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu } } - fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + i, obj.highest_offset); + fmt::append(dump, " (image addr: 0x%x, size: 0x%x)", seg.addr + prefix_addr, obj.highest_offset); sha1_finish(&sha2, sha1_hash); @@ -1454,8 +1465,8 @@ static void ppu_check_patch_spu_images(const ppu_module& mod, const ppu ppu_loader.success("SPU executable hash: %s (<- %u)%s", hash, applied.size(), dump); } - i += ::narrow(obj.highest_offset - 4); - prev_bound = i + 4; + prefix_addr += ::narrow(obj.highest_offset - 4); + prev_bound = prefix_addr + 4; } } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 1d7c0d7f29..589961eb92 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -4327,7 +4327,7 @@ bool spu_thread::check_mfc_interrupts(u32 next_pc) return false; } -bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr, bool avoid_dead_code) +bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr, bool avoid_dead_code, bool is_range_limited) { bool had_conditional = false; @@ -4335,12 +4335,12 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add { if (addr & ~0x3FFFC) { - return false; + return is_range_limited; } if (addr < base_addr || addr >= base_addr + ls_ptr.size()) { - return false; + return is_range_limited; } const u32 addr0 = spu_branch_target(addr); @@ -4450,12 +4450,12 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add { if (addr < 0u - rel) { - return false; + return is_range_limited; } } else if (SPU_LS_SIZE - addr <= rel + 0u) { - return false; + return is_range_limited; } if (type == spu_itype::BRSL) @@ -4485,7 +4485,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add if (route_pc < base_addr || route_pc >= base_addr + ls_ptr.size()) { - return false; + return is_range_limited; } // Test the validity of a single instruction of the optional target diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 37cf1cf074..a5495899f2 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -843,7 +843,7 @@ public: void set_events(u32 bits); void set_interrupt_status(bool enable); bool check_mfc_interrupts(u32 next_pc); - static bool is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr = 0, bool avoid_dead_code = false); // Only a hint, do not rely on it other than debugging purposes + static bool is_exec_code(u32 addr, std::span ls_ptr, u32 base_addr = 0, bool avoid_dead_code = false, bool is_range_limited = false); // A hint, do not rely on it for true execution compatibility static std::vector discover_functions(u32 base_addr, std::span ls, bool is_known_addr, u32 /*entry*/); u32 get_ch_count(u32 ch); s64 get_ch_value(u32 ch);