mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-03 13:31:27 +12:00
SPU: Function discovery fix
Do not detect branch to next.
This commit is contained in:
parent
a597368c46
commit
e851c044b5
2 changed files with 24 additions and 6 deletions
|
@ -2122,16 +2122,16 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
// Discover functions
|
// Discover functions
|
||||||
// Use the most simple method: search for instructions that calls them
|
// Use the most simple method: search for instructions that calls them
|
||||||
// And then filter invalid cases (does not detect tail calls)
|
// And then filter invalid cases (does not detect tail calls)
|
||||||
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62) : v128::from32p(umax);
|
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
|
||||||
|
|
||||||
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
|
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
|
||||||
{
|
{
|
||||||
// Search for BRSL and BRASL
|
// Search for BRSL LR and BRASL LR
|
||||||
// TODO: BISL
|
// TODO: BISL
|
||||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||||
const v128 shifted = gv_shr32(inst, 23);
|
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
||||||
const v128 eq_brsl = gv_eq32(shifted, v128::from32p(0x66));
|
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||||
const v128 eq_brasl = gv_eq32(shifted, brasl_mask);
|
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||||
const v128 result = eq_brsl | eq_brasl;
|
const v128 result = eq_brsl | eq_brasl;
|
||||||
|
|
||||||
if (!gv_testz(result))
|
if (!gv_testz(result))
|
||||||
|
@ -2160,7 +2160,7 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
|
|
||||||
const u32 func = op_branch_targets(addr, op)[0];
|
const u32 func = op_branch_targets(addr, op)[0];
|
||||||
|
|
||||||
if (func == umax || std::count(addrs.begin(), addrs.end(), func))
|
if (func == umax || addr + 4 == func || func == addr || std::count(addrs.begin(), addrs.end(), func))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2988,6 +2988,24 @@ inline v128 gv_rol32(const v128& a, const v128& b)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For each 32-bit element, r = rotate a by count
|
||||||
|
inline v128 gv_rol32(const v128& a, u32 count)
|
||||||
|
{
|
||||||
|
count %= 32;
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
return _mm_or_epi32(_mm_srli_epi32(a, 32 - count), _mm_slli_epi32(a, count));
|
||||||
|
#elif defined(ARCH_ARM64)
|
||||||
|
const auto amt1 = vdupq_n_s32(count);
|
||||||
|
const auto amt2 = vdupq_n_s32(count - 32);
|
||||||
|
return vorrq_u32(vshlq_u32(a, amt1), vshlq_u32(a, amt2));
|
||||||
|
#else
|
||||||
|
v128 r;
|
||||||
|
for (u32 i = 0; i < 4; i++)
|
||||||
|
r._u32[i] = utils::rol32(a._u32[i], count);
|
||||||
|
return r;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// For each 8-bit element, r = (a << (c & 7)) | (b >> (~c & 7) >> 1)
|
// For each 8-bit element, r = (a << (c & 7)) | (b >> (~c & 7) >> 1)
|
||||||
template <typename A, typename B, typename C>
|
template <typename A, typename B, typename C>
|
||||||
inline auto gv_fshl8(A&& a, B&& b, C&& c)
|
inline auto gv_fshl8(A&& a, B&& b, C&& c)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue