mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
Fix SPU Interpreter regression after #6147
This commit is contained in:
parent
1e2f81dd33
commit
cb5c26f2b5
3 changed files with 58 additions and 37 deletions
|
@ -24,6 +24,9 @@ const spu_decoder<spu_itype> s_spu_itype;
|
||||||
const spu_decoder<spu_iname> s_spu_iname;
|
const spu_decoder<spu_iname> s_spu_iname;
|
||||||
const spu_decoder<spu_iflag> s_spu_iflag;
|
const spu_decoder<spu_iflag> s_spu_iflag;
|
||||||
|
|
||||||
|
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise;
|
||||||
|
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;
|
||||||
|
|
||||||
extern u64 get_timebased_time();
|
extern u64 get_timebased_time();
|
||||||
|
|
||||||
// Move 4 args for calling native function from a GHC calling convention function
|
// Move 4 args for calling native function from a GHC calling convention function
|
||||||
|
@ -74,6 +77,18 @@ DECLARE(spu_runtime::tr_branch) = []
|
||||||
return reinterpret_cast<spu_function_t>(trptr);
|
return reinterpret_cast<spu_function_t>(trptr);
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
DECLARE(spu_runtime::tr_interpreter) = []
|
||||||
|
{
|
||||||
|
u8* const trptr = jit_runtime::alloc(32, 16);
|
||||||
|
u8* raw = move_args_ghc_to_native(trptr);
|
||||||
|
*raw++ = 0xff; // jmp [rip]
|
||||||
|
*raw++ = 0x25;
|
||||||
|
std::memset(raw, 0, 4);
|
||||||
|
const u64 target = reinterpret_cast<u64>(&spu_recompiler_base::old_interpreter);
|
||||||
|
std::memcpy(raw + 4, &target, 8);
|
||||||
|
return reinterpret_cast<spu_function_t>(trptr);
|
||||||
|
}();
|
||||||
|
|
||||||
DECLARE(spu_runtime::g_dispatcher) = []
|
DECLARE(spu_runtime::g_dispatcher) = []
|
||||||
{
|
{
|
||||||
const auto ptr = reinterpret_cast<decltype(spu_runtime::g_dispatcher)>(jit_runtime::alloc(sizeof(spu_function_t), 8, false));
|
const auto ptr = reinterpret_cast<decltype(spu_runtime::g_dispatcher)>(jit_runtime::alloc(sizeof(spu_function_t), 8, false));
|
||||||
|
@ -281,7 +296,8 @@ void spu_cache::add(const std::vector<u32>& func)
|
||||||
|
|
||||||
void spu_cache::initialize()
|
void spu_cache::initialize()
|
||||||
{
|
{
|
||||||
spu_runtime::g_interpreter = nullptr;
|
spu_runtime::g_interpreter = spu_runtime::g_gateway;
|
||||||
|
*spu_runtime::g_dispatcher = spu_runtime::tr_interpreter;
|
||||||
|
|
||||||
const std::string ppu_cache = Emu.PPUCache();
|
const std::string ppu_cache = Emu.PPUCache();
|
||||||
|
|
||||||
|
@ -1145,6 +1161,37 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
|
||||||
atomic_storage<u64>::release(*reinterpret_cast<u64*>(rip), result);
|
atomic_storage<u64>::release(*reinterpret_cast<u64*>(rip), result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* rip) try
|
||||||
|
{
|
||||||
|
// Select opcode table
|
||||||
|
const auto& table = *(
|
||||||
|
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
|
||||||
|
g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() :
|
||||||
|
(fmt::throw_exception<std::logic_error>("Invalid SPU decoder"), nullptr));
|
||||||
|
|
||||||
|
// LS pointer
|
||||||
|
const auto base = static_cast<const u8*>(ls);
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (UNLIKELY(spu.state))
|
||||||
|
{
|
||||||
|
if (spu.check_state())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + spu.pc);
|
||||||
|
if (table[spu_decode(op)](spu, {op}))
|
||||||
|
spu.pc += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::exception& e)
|
||||||
|
{
|
||||||
|
Emu.Pause();
|
||||||
|
LOG_FATAL(GENERAL, "%s thrown: %s", typeid(e).name(), e.what());
|
||||||
|
LOG_NOTICE(GENERAL, "\n%s", spu.dump());
|
||||||
|
}
|
||||||
|
|
||||||
const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
const std::vector<u32>& spu_recompiler_base::analyse(const be_t<u32>* ls, u32 entry_point)
|
||||||
{
|
{
|
||||||
// Result: addr + raw instruction data
|
// Result: addr + raw instruction data
|
||||||
|
@ -4713,8 +4760,6 @@ public:
|
||||||
|
|
||||||
static void interp_check(spu_thread* _spu, bool after)
|
static void interp_check(spu_thread* _spu, bool after)
|
||||||
{
|
{
|
||||||
static const spu_decoder<spu_interpreter_fast> s_dec;
|
|
||||||
|
|
||||||
static thread_local std::array<v128, 128> s_gpr;
|
static thread_local std::array<v128, 128> s_gpr;
|
||||||
|
|
||||||
if (!after)
|
if (!after)
|
||||||
|
@ -4724,7 +4769,7 @@ public:
|
||||||
|
|
||||||
// Execute interpreter instruction
|
// Execute interpreter instruction
|
||||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(_spu->_ptr<u8>(0) + _spu->pc);
|
const u32 op = *reinterpret_cast<const be_t<u32>*>(_spu->_ptr<u8>(0) + _spu->pc);
|
||||||
if (!s_dec.decode(op)(*_spu, {op}))
|
if (!g_spu_interpreter_fast.decode(op)(*_spu, {op}))
|
||||||
LOG_FATAL(SPU, "Bad instruction" HERE);
|
LOG_FATAL(SPU, "Bad instruction" HERE);
|
||||||
|
|
||||||
// Swap state
|
// Swap state
|
||||||
|
|
|
@ -70,6 +70,9 @@ public:
|
||||||
// Trampoline to spu_recompiler_base::branch
|
// Trampoline to spu_recompiler_base::branch
|
||||||
static const spu_function_t tr_branch;
|
static const spu_function_t tr_branch;
|
||||||
|
|
||||||
|
// Trampoline to legacy interpreter
|
||||||
|
static const spu_function_t tr_interpreter;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
spu_runtime();
|
spu_runtime();
|
||||||
|
|
||||||
|
@ -356,6 +359,9 @@ public:
|
||||||
// Target for the unresolved patch point (second arg is unused)
|
// Target for the unresolved patch point (second arg is unused)
|
||||||
static void branch(spu_thread&, void*, u8* rip);
|
static void branch(spu_thread&, void*, u8* rip);
|
||||||
|
|
||||||
|
// Legacy interpreter loop
|
||||||
|
static void old_interpreter(spu_thread&, void* ls, u8*);
|
||||||
|
|
||||||
// Get the function data at specified address
|
// Get the function data at specified address
|
||||||
const std::vector<u32>& analyse(const be_t<u32>* ls, u32 lsa);
|
const std::vector<u32>& analyse(const be_t<u32>* ls, u32 lsa);
|
||||||
|
|
||||||
|
|
|
@ -71,10 +71,6 @@ static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const declt
|
||||||
extern u64 get_timebased_time();
|
extern u64 get_timebased_time();
|
||||||
extern u64 get_system_time();
|
extern u64 get_system_time();
|
||||||
|
|
||||||
extern const spu_decoder<spu_interpreter_precise> g_spu_interpreter_precise;
|
|
||||||
|
|
||||||
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast;
|
|
||||||
|
|
||||||
extern thread_local u64 g_tls_fault_spu;
|
extern thread_local u64 g_tls_fault_spu;
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -1156,12 +1152,11 @@ void spu_thread::cpu_task()
|
||||||
|
|
||||||
// Print some stats
|
// Print some stats
|
||||||
LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure);
|
LOG_NOTICE(SPU, "Stats: Block Weight: %u (Retreats: %u);", block_counter, block_failure);
|
||||||
cpu_stop();
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if (spu_runtime::g_interpreter)
|
|
||||||
{
|
{
|
||||||
|
ASSERT(spu_runtime::g_interpreter);
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
if (UNLIKELY(state))
|
if (UNLIKELY(state))
|
||||||
|
@ -1172,31 +1167,6 @@ void spu_thread::cpu_task()
|
||||||
|
|
||||||
spu_runtime::g_interpreter(*this, vm::_ptr<u8>(offset), nullptr);
|
spu_runtime::g_interpreter(*this, vm::_ptr<u8>(offset), nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_stop();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Select opcode table
|
|
||||||
const auto& table = *(
|
|
||||||
g_cfg.core.spu_decoder == spu_decoder_type::precise ? &g_spu_interpreter_precise.get_table() :
|
|
||||||
g_cfg.core.spu_decoder == spu_decoder_type::fast ? &g_spu_interpreter_fast.get_table() :
|
|
||||||
(fmt::throw_exception<std::logic_error>("Invalid SPU decoder"), nullptr));
|
|
||||||
|
|
||||||
// LS pointer
|
|
||||||
const auto base = vm::_ptr<const u8>(offset);
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
if (UNLIKELY(state))
|
|
||||||
{
|
|
||||||
if (check_state())
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 op = *reinterpret_cast<const be_t<u32>*>(base + pc);
|
|
||||||
if (table[spu_decode(op)](*this, {op}))
|
|
||||||
pc += 4;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_stop();
|
cpu_stop();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue