mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 08:21:29 +12:00
SPU ASMJIT: simplify patchpoints
Remove SPU thread reference from spu_recompiler_base Disable support for far jumps in pathpoints (they were rare and unsafe)
This commit is contained in:
parent
1ca51a023c
commit
8f91917e8c
5 changed files with 43 additions and 91 deletions
|
@ -25,9 +25,9 @@ const spu_decoder<spu_recompiler> s_spu_decoder;
|
||||||
|
|
||||||
extern u64 get_timebased_time();
|
extern u64 get_timebased_time();
|
||||||
|
|
||||||
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler(SPUThread& spu)
|
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_asmjit_recompiler()
|
||||||
{
|
{
|
||||||
return std::make_unique<spu_recompiler>(spu);
|
return std::make_unique<spu_recompiler>();
|
||||||
}
|
}
|
||||||
|
|
||||||
spu_runtime::spu_runtime()
|
spu_runtime::spu_runtime()
|
||||||
|
@ -44,8 +44,7 @@ spu_runtime::spu_runtime()
|
||||||
m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;
|
m_map[std::vector<u32>()] = &spu_recompiler_base::dispatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
spu_recompiler::spu_recompiler(SPUThread& spu)
|
spu_recompiler::spu_recompiler()
|
||||||
: spu_recompiler_base(spu)
|
|
||||||
{
|
{
|
||||||
if (!g_cfg.core.spu_shared_runtime)
|
if (!g_cfg.core.spu_shared_runtime)
|
||||||
{
|
{
|
||||||
|
@ -1115,15 +1114,18 @@ void spu_recompiler::branch_fixed(u32 target)
|
||||||
c->cmp(SPU_OFF_32(state), 0);
|
c->cmp(SPU_OFF_32(state), 0);
|
||||||
c->jz(local->second);
|
c->jz(local->second);
|
||||||
c->mov(SPU_OFF_32(pc), target);
|
c->mov(SPU_OFF_32(pc), target);
|
||||||
c->ret();
|
c->jmp(label_stop);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
|
||||||
{
|
|
||||||
// Don't generate patch points in this mode
|
|
||||||
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
|
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
|
||||||
c->mov(SPU_OFF_32(pc), target);
|
c->mov(SPU_OFF_32(pc), target);
|
||||||
|
c->cmp(SPU_OFF_32(state), 0);
|
||||||
|
c->jnz(label_stop);
|
||||||
|
|
||||||
|
if (false)
|
||||||
|
{
|
||||||
|
// Don't generate patch points (TODO)
|
||||||
c->xor_(qw0->r32(), qw0->r32());
|
c->xor_(qw0->r32(), qw0->r32());
|
||||||
c->jmp(x86::rax);
|
c->jmp(x86::rax);
|
||||||
return;
|
return;
|
||||||
|
@ -1132,40 +1134,17 @@ void spu_recompiler::branch_fixed(u32 target)
|
||||||
// Set patch address as a third argument and fallback to it
|
// Set patch address as a third argument and fallback to it
|
||||||
Label patch_point = c->newLabel();
|
Label patch_point = c->newLabel();
|
||||||
c->lea(*qw0, x86::qword_ptr(patch_point));
|
c->lea(*qw0, x86::qword_ptr(patch_point));
|
||||||
c->mov(SPU_OFF_32(pc), target);
|
|
||||||
|
|
||||||
// Need to emit exactly one executable instruction within 8 bytes
|
// Need to emit exactly one executable instruction within 8 bytes
|
||||||
c->align(kAlignCode, 8);
|
c->align(kAlignCode, 8);
|
||||||
c->bind(patch_point);
|
c->bind(patch_point);
|
||||||
|
//c->dq(0x841f0f);
|
||||||
const auto result = m_spurt->m_map.emplace(block(m_spu, target), nullptr);
|
|
||||||
|
|
||||||
if (result.second || !result.first->second)
|
|
||||||
{
|
|
||||||
if (result.first->first.size())
|
|
||||||
{
|
|
||||||
// Target block hasn't been compiled yet, record overwriting position
|
|
||||||
c->jmp(imm_ptr(&spu_recompiler_base::branch));
|
c->jmp(imm_ptr(&spu_recompiler_base::branch));
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// SPURS Workload entry point or similar thing (emit 8-byte NOP)
|
|
||||||
c->dq(0x841f0f);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
c->jmp(imm_ptr(result.first->second));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Branch via dispatcher (occupies 16 bytes including padding)
|
// Fallback to the branch via dispatcher
|
||||||
c->align(kAlignCode, 8);
|
c->align(kAlignCode, 8);
|
||||||
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
|
|
||||||
c->xor_(qw0->r32(), qw0->r32());
|
c->xor_(qw0->r32(), qw0->r32());
|
||||||
c->jmp(x86::rax);
|
c->jmp(x86::rax);
|
||||||
c->align(kAlignCode, 8);
|
|
||||||
c->dq(reinterpret_cast<u64>(&*result.first));
|
|
||||||
c->dq(reinterpret_cast<u64>(result.first->second));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void spu_recompiler::branch_indirect(spu_opcode_t op)
|
void spu_recompiler::branch_indirect(spu_opcode_t op)
|
||||||
|
|
|
@ -35,7 +35,7 @@ class spu_recompiler : public spu_recompiler_base
|
||||||
std::shared_ptr<spu_runtime> m_spurt;
|
std::shared_ptr<spu_runtime> m_spurt;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
spu_recompiler(class SPUThread& spu);
|
spu_recompiler();
|
||||||
|
|
||||||
virtual spu_function_t get(u32 lsa) override;
|
virtual spu_function_t get(u32 lsa) override;
|
||||||
|
|
||||||
|
|
|
@ -17,11 +17,8 @@ extern u64 get_system_time();
|
||||||
|
|
||||||
const spu_decoder<spu_itype> s_spu_itype;
|
const spu_decoder<spu_itype> s_spu_itype;
|
||||||
|
|
||||||
spu_recompiler_base::spu_recompiler_base(SPUThread& spu)
|
spu_recompiler_base::spu_recompiler_base()
|
||||||
: m_spu(spu)
|
|
||||||
{
|
{
|
||||||
// Initialize lookup table
|
|
||||||
spu.jit_dispatcher.fill(&dispatch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spu_recompiler_base::~spu_recompiler_base()
|
spu_recompiler_base::~spu_recompiler_base()
|
||||||
|
@ -30,7 +27,7 @@ spu_recompiler_base::~spu_recompiler_base()
|
||||||
|
|
||||||
void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
|
void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
|
||||||
{
|
{
|
||||||
// If check failed after direct branch, patch it with single NOP
|
// If code verification failed from a patched patchpoint, clear it with a single NOP
|
||||||
if (rip)
|
if (rip)
|
||||||
{
|
{
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
@ -63,16 +60,9 @@ void spu_recompiler_base::dispatch(SPUThread& spu, void*, u8* rip)
|
||||||
|
|
||||||
void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
|
void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
|
||||||
{
|
{
|
||||||
const auto pair = *reinterpret_cast<std::pair<const std::vector<u32>, spu_function_t>**>(rip + 24);
|
// Compile
|
||||||
|
const auto func = verify(HERE, spu.jit->compile(block(spu, spu.pc, &spu.jit->m_block_info)));
|
||||||
spu.pc = pair->first[0];
|
spu.jit_dispatcher[spu.pc / 4] = spu.jit->get(spu.pc);
|
||||||
|
|
||||||
const auto func = pair->second ? pair->second : spu.jit->compile(pair->first);
|
|
||||||
|
|
||||||
verify(HERE), func, pair->second == func;
|
|
||||||
|
|
||||||
// Overwrite function address
|
|
||||||
reinterpret_cast<atomic_t<spu_function_t>*>(rip + 32)->store(func);
|
|
||||||
|
|
||||||
// Overwrite jump to this function with jump to the compiled function
|
// Overwrite jump to this function with jump to the compiled function
|
||||||
const s64 rel = reinterpret_cast<u64>(func) - reinterpret_cast<u64>(rip) - 5;
|
const s64 rel = reinterpret_cast<u64>(func) - reinterpret_cast<u64>(rip) - 5;
|
||||||
|
@ -98,14 +88,11 @@ void spu_recompiler_base::branch(SPUThread& spu, void*, u8* rip)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bytes[0] = 0xff; // jmp [rip+26]
|
// Far jumps: extremely rare and disabled due to implementation complexity
|
||||||
bytes[1] = 0x25;
|
bytes[0] = 0x0f; // nop (8-byte form)
|
||||||
bytes[2] = 0x1a;
|
bytes[1] = 0x1f;
|
||||||
bytes[3] = 0x00;
|
bytes[2] = 0x84;
|
||||||
bytes[4] = 0x00;
|
std::memset(bytes + 3, 0x00, 5);
|
||||||
bytes[5] = 0x00;
|
|
||||||
bytes[6] = 0x90;
|
|
||||||
bytes[7] = 0x90;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
|
@ -200,7 +187,7 @@ std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset
|
||||||
//case spu_itype::DFCMGT:
|
//case spu_itype::DFCMGT:
|
||||||
case spu_itype::DFTSV:
|
case spu_itype::DFTSV:
|
||||||
{
|
{
|
||||||
// Stop on invalid instructions (TODO)
|
// Stop before invalid instructions (TODO)
|
||||||
blocks[pos / 4] = true;
|
blocks[pos / 4] = true;
|
||||||
next_block();
|
next_block();
|
||||||
continue;
|
continue;
|
||||||
|
@ -211,7 +198,7 @@ std::vector<u32> spu_recompiler_base::block(SPUThread& spu, u32 lsa, std::bitset
|
||||||
case spu_itype::STOP:
|
case spu_itype::STOP:
|
||||||
case spu_itype::STOPD:
|
case spu_itype::STOPD:
|
||||||
{
|
{
|
||||||
if (data == 0)
|
if (data == 0 || data == 0x80)
|
||||||
{
|
{
|
||||||
// Stop before null data
|
// Stop before null data
|
||||||
blocks[pos / 4] = true;
|
blocks[pos / 4] = true;
|
||||||
|
@ -751,8 +738,8 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
spu_llvm_recompiler(class SPUThread& spu)
|
spu_llvm_recompiler()
|
||||||
: spu_recompiler_base(spu)
|
: spu_recompiler_base()
|
||||||
, cpu_translator(nullptr, false)
|
, cpu_translator(nullptr, false)
|
||||||
{
|
{
|
||||||
if (g_cfg.core.spu_shared_runtime)
|
if (g_cfg.core.spu_shared_runtime)
|
||||||
|
@ -1035,21 +1022,6 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_debug)
|
|
||||||
{
|
|
||||||
log += '\n';
|
|
||||||
|
|
||||||
for (u32 i = 0; i < 128; i++)
|
|
||||||
{
|
|
||||||
if (m_gpr[i].first)
|
|
||||||
{
|
|
||||||
fmt::append(log, "$% -3u = %s\n", i, m_spu.gpr[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log += '\n';
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make fallthrough if necessary
|
// Make fallthrough if necessary
|
||||||
if (!m_ir->GetInsertBlock()->getTerminator())
|
if (!m_ir->GetInsertBlock()->getTerminator())
|
||||||
{
|
{
|
||||||
|
@ -2827,9 +2799,9 @@ public:
|
||||||
static const spu_decoder<spu_llvm_recompiler> g_decoder;
|
static const spu_decoder<spu_llvm_recompiler> g_decoder;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler(SPUThread& spu)
|
std::unique_ptr<spu_recompiler_base> spu_recompiler_base::make_llvm_recompiler()
|
||||||
{
|
{
|
||||||
return std::make_unique<spu_llvm_recompiler>(spu);
|
return std::make_unique<spu_llvm_recompiler>();
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLARE(spu_llvm_recompiler::g_decoder);
|
DECLARE(spu_llvm_recompiler::g_decoder);
|
||||||
|
|
|
@ -7,15 +7,13 @@
|
||||||
class spu_recompiler_base
|
class spu_recompiler_base
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
SPUThread& m_spu;
|
|
||||||
|
|
||||||
u32 m_pos;
|
u32 m_pos;
|
||||||
u32 m_size;
|
u32 m_size;
|
||||||
|
|
||||||
std::bitset<0x10000> m_block_info;
|
std::bitset<0x10000> m_block_info;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
spu_recompiler_base(SPUThread& spu);
|
spu_recompiler_base();
|
||||||
|
|
||||||
virtual ~spu_recompiler_base();
|
virtual ~spu_recompiler_base();
|
||||||
|
|
||||||
|
@ -25,18 +23,18 @@ public:
|
||||||
// Compile function
|
// Compile function
|
||||||
virtual spu_function_t compile(const std::vector<u32>& func) = 0;
|
virtual spu_function_t compile(const std::vector<u32>& func) = 0;
|
||||||
|
|
||||||
// Default dispatch function fallback (second pointer is unused)
|
// Default dispatch function fallback (second arg is unused)
|
||||||
static void dispatch(SPUThread&, void*, u8*);
|
static void dispatch(SPUThread&, void*, u8* rip);
|
||||||
|
|
||||||
// Direct branch fallback for non-compiled destination
|
// Target for the unresolved patch point (second arg is unused)
|
||||||
static void branch(SPUThread&, void*, u8*);
|
static void branch(SPUThread&, void*, u8* rip);
|
||||||
|
|
||||||
// Get the block at specified address
|
// Get the block at specified address
|
||||||
static std::vector<u32> block(SPUThread&, u32 lsa, std::bitset<0x10000>* = nullptr);
|
static std::vector<u32> block(SPUThread&, u32 lsa, std::bitset<0x10000>* = nullptr);
|
||||||
|
|
||||||
// Create recompiler instance (ASMJIT)
|
// Create recompiler instance (ASMJIT)
|
||||||
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler(SPUThread& spu);
|
static std::unique_ptr<spu_recompiler_base> make_asmjit_recompiler();
|
||||||
|
|
||||||
// Create recompiler instance (LLVM)
|
// Create recompiler instance (LLVM)
|
||||||
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler(SPUThread& spu);
|
static std::unique_ptr<spu_recompiler_base> make_llvm_recompiler();
|
||||||
};
|
};
|
||||||
|
|
|
@ -531,13 +531,16 @@ SPUThread::SPUThread(const std::string& name, u32 index, lv2_spu_group* group)
|
||||||
{
|
{
|
||||||
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
if (g_cfg.core.spu_decoder == spu_decoder_type::asmjit)
|
||||||
{
|
{
|
||||||
jit = spu_recompiler_base::make_asmjit_recompiler(*this);
|
jit = spu_recompiler_base::make_asmjit_recompiler();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
if (g_cfg.core.spu_decoder == spu_decoder_type::llvm)
|
||||||
{
|
{
|
||||||
jit = spu_recompiler_base::make_llvm_recompiler(*this);
|
jit = spu_recompiler_base::make_llvm_recompiler();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize lookup table
|
||||||
|
jit_dispatcher.fill(&spu_recompiler_base::dispatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SPUThread::push_snr(u32 number, u32 value)
|
void SPUThread::push_snr(u32 number, u32 value)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue