SPU ASMJIT: internal jumptable

Allow indirect calls within current function using a jumptable
This restores some functionality removed in SPU ASMJIT 2.0
Change SPUThread::get_ch_value prototype
This commit is contained in:
Nekotekina 2018-04-30 19:39:06 +03:00
parent df453d6d4f
commit 767dfa271e
7 changed files with 302 additions and 229 deletions

View file

@ -133,12 +133,8 @@ bool spursDma(SPUThread& spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag)
if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD)
{ {
u32 rv; const u32 rv = spu.get_ch_value(MFC_RdAtomicStat);
return cmd == MFC_PUTLLC_CMD ? !rv : true;
spu.get_ch_value(MFC_RdAtomicStat, rv);
auto success = rv ? true : false;
success = cmd == MFC_PUTLLC_CMD ? !success : success;
return success;
} }
return true; return true;
@ -149,7 +145,7 @@ u32 spursDmaGetCompletionStatus(SPUThread& spu, u32 tagMask)
{ {
spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagMask, tagMask);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_IMMEDIATE);
u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv; return spu.get_ch_value(MFC_RdTagStat);
} }
// Wait for DMA operations to complete // Wait for DMA operations to complete
@ -157,7 +153,7 @@ u32 spursDmaWaitForCompletion(SPUThread& spu, u32 tagMask, bool waitForAll)
{ {
spu.set_ch_value(MFC_WrTagMask, tagMask); spu.set_ch_value(MFC_WrTagMask, tagMask);
spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY); spu.set_ch_value(MFC_WrTagUpdate, waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY);
u32 rv; spu.get_ch_value(MFC_RdTagStat, rv); return rv; return spu.get_ch_value(MFC_RdTagStat);
} }
// Halt the SPU // Halt the SPU
@ -168,16 +164,15 @@ void spursHalt(SPUThread& spu)
void sys_spu_thread_exit(SPUThread& spu, s32 status) void sys_spu_thread_exit(SPUThread& spu, s32 status)
{ {
u32 _v;
// Cancel any pending status update requests // Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0); spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1); while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, _v); spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete // Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, _v); spu.get_ch_value(MFC_RdTagStat);
spu.set_ch_value(SPU_WrOutMbox, status); spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x102); spu.stop_and_signal(0x102);
@ -185,16 +180,15 @@ void sys_spu_thread_exit(SPUThread& spu, s32 status)
void sys_spu_thread_group_exit(SPUThread& spu, s32 status) void sys_spu_thread_group_exit(SPUThread& spu, s32 status)
{ {
u32 _v;
// Cancel any pending status update requests // Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0); spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1); while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, _v); spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete // Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, _v); spu.get_ch_value(MFC_RdTagStat);
spu.set_ch_value(SPU_WrOutMbox, status); spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x101); spu.stop_and_signal(0x101);
@ -214,9 +208,7 @@ s32 sys_spu_thread_send_event(SPUThread& spu, u8 spup, u32 data0, u32 data1)
spu.set_ch_value(SPU_WrOutMbox, data1); spu.set_ch_value(SPU_WrOutMbox, data1);
spu.set_ch_value(SPU_WrOutIntrMbox, (spup << 24) | (data0 & 0x00FFFFFF)); spu.set_ch_value(SPU_WrOutIntrMbox, (spup << 24) | (data0 & 0x00FFFFFF));
return static_cast<u32>(spu.get_ch_value(SPU_RdInMbox));
spu.get_ch_value(SPU_RdInMbox, data0);
return data0;
} }
s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status) s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
@ -231,18 +223,18 @@ s32 sys_spu_thread_switch_system_module(SPUThread& spu, u32 status)
// Cancel any pending status update requests // Cancel any pending status update requests
spu.set_ch_value(MFC_WrTagUpdate, 0); spu.set_ch_value(MFC_WrTagUpdate, 0);
while (spu.get_ch_count(MFC_RdTagStat) != 1); while (spu.get_ch_count(MFC_RdTagStat) != 1);
spu.get_ch_value(MFC_RdTagStat, result); spu.get_ch_value(MFC_RdTagStat);
// Wait for all pending DMA operations to complete // Wait for all pending DMA operations to complete
spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF); spu.set_ch_value(MFC_WrTagMask, 0xFFFFFFFF);
spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL); spu.set_ch_value(MFC_WrTagUpdate, MFC_TAG_UPDATE_ALL);
spu.get_ch_value(MFC_RdTagStat, result); spu.get_ch_value(MFC_RdTagStat);
do do
{ {
spu.set_ch_value(SPU_WrOutMbox, status); spu.set_ch_value(SPU_WrOutMbox, status);
spu.stop_and_signal(0x120); spu.stop_and_signal(0x120);
spu.get_ch_value(SPU_RdInMbox, result); result = spu.get_ch_value(SPU_RdInMbox);
} }
while (result == CELL_EBUSY); while (result == CELL_EBUSY);
@ -1719,9 +1711,8 @@ s32 spursTasketSaveTaskContext(SPUThread& spu)
v128 r; v128 r;
spu.fpscr.Read(r); spu.fpscr.Read(r);
ctxt->savedContextFpscr = r; ctxt->savedContextFpscr = r;
u32 r32; ctxt->savedSpuWriteEventMask = spu.get_ch_value(SPU_RdEventMask);
spu.get_ch_value(SPU_RdEventMask, r32); ctxt->savedSpuWriteEventMask = r32; ctxt->savedWriteTagGroupQueryMask = spu.get_ch_value(MFC_RdTagMask);
spu.get_ch_value(MFC_RdTagMask, r32); ctxt->savedWriteTagGroupQueryMask = r32;
// Store the processor context // Store the processor context
const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80, HERE); const u32 contextSaveStorage = vm::cast(taskInfo->context_save_storage_and_alloc_ls_blocks & -0x80, HERE);

View file

@ -152,7 +152,7 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
vec[i] = vec_vars[i]; vec[i] = vec_vars[i];
} }
Label label_stop = c->newLabel(); label_stop = c->newLabel();
Label label_diff = c->newLabel(); Label label_diff = c->newLabel();
Label label_code = c->newLabel(); Label label_code = c->newLabel();
std::vector<u32> words; std::vector<u32> words;
@ -163,6 +163,15 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
const u32 start = m_pos; const u32 start = m_pos;
const u32 end = m_pos + (func.size() - 1) * 4; const u32 end = m_pos + (func.size() - 1) * 4;
// Create instruction labels (TODO: some of them are unnecessary)
for (u32 i = 1; i < func.size(); i++)
{
if (func[i])
{
instr_labels[i * 4 - 4 + m_pos] = c->newLabel();
}
}
// Set PC and check status // Set PC and check status
c->mov(SPU_OFF_32(pc), m_pos); c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(SPU_OFF_32(state), 0); c->cmp(SPU_OFF_32(state), 0);
@ -713,6 +722,14 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
// Update position // Update position
m_pos = pos; m_pos = pos;
// Bind instruction label if necessary
const auto found = instr_labels.find(pos);
if (found != instr_labels.end())
{
c->bind(found->second);
}
// Execute recompiler function // Execute recompiler function
(this->*s_spu_decoder.decode(op))({op}); (this->*s_spu_decoder.decode(op))({op});
@ -750,6 +767,27 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work(); work();
} }
// Build instruction dispatch table
if (instr_table.isValid())
{
c->align(kAlignData, 8);
c->bind(instr_table);
for (u32 addr = start; addr < end; addr += 4)
{
const auto found = instr_labels.find(addr);
if (found != instr_labels.end())
{
c->embedLabel(found->second);
}
else
{
c->embedLabel(label_stop);
}
}
}
c->align(kAlignData, words_align); c->align(kAlignData, words_align);
c->bind(label_code); c->bind(label_code);
for (u32 d : words) for (u32 d : words)
@ -760,6 +798,9 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work(); work();
} }
label_stop.reset();
instr_table.reset();
instr_labels.clear();
xmm_consts.clear(); xmm_consts.clear();
// Compile and get function address // Compile and get function address
@ -1066,6 +1107,28 @@ void spu_recompiler::branch_fixed(u32 target)
{ {
using namespace asmjit; using namespace asmjit;
// Check local branch
const auto local = instr_labels.find(target);
if (local != instr_labels.end() && local->second.isValid())
{
c->cmp(SPU_OFF_32(state), 0);
c->jz(local->second);
c->mov(SPU_OFF_32(pc), target);
c->ret();
return;
}
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
// Don't generate patch points in this mode
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
c->xor_(qw0->r32(), qw0->r32());
c->jmp(x86::rax);
return;
}
// Set patch address as a third argument and fallback to it // Set patch address as a third argument and fallback to it
Label patch_point = c->newLabel(); Label patch_point = c->newLabel();
c->lea(*qw0, x86::qword_ptr(patch_point)); c->lea(*qw0, x86::qword_ptr(patch_point));
@ -1109,9 +1172,23 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
{ {
using namespace asmjit; using namespace asmjit;
// Load indirect jump address if (!instr_table.isValid())
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher))); {
// Request instruction table
instr_table = c->newLabel();
}
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
// Load indirect jump address, choose between local and external
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->lea(*qw1, x86::qword_ptr(*addr, 0 - start));
c->xor_(qw0->r32(), qw0->r32()); c->xor_(qw0->r32(), qw0->r32());
c->cmp(qw1->r32(), end - start);
c->cmovae(qw1->r32(), qw0->r32());
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
if (op.d) if (op.d)
{ {
@ -1119,22 +1196,34 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
} }
else if (op.e) else if (op.e)
{ {
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0); Label no_intr = c->newLabel();
c->mov(x86::r9d, SPU_OFF_32(ch_event_stat)); Label intr = c->newLabel();
c->and_(x86::r9d, SPU_OFF_32(ch_event_mask)); Label fail = c->newLabel();
c->and_(x86::r9d, SPU_EVENT_INTR_TEST);
c->cmp(x86::r9d, 0);
Label noInterrupt = c->newLabel(); c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->je(noInterrupt); c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask));
c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(fail);
c->and_(qw1->r32(), SPU_OFF_32(ch_event_stat));
c->test(qw1->r32(), SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(intr);
c->jmp(no_intr);
c->bind(fail);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(addr->r64(), reinterpret_cast<u64>(vm::base(0xffdead00)));
c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32);
c->bind(intr);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0); c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr); c->mov(SPU_OFF_32(srr0), *addr);
branch_fixed(0); c->mov(*addr, qw0->r32());
c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher)));
c->align(kAlignCode, 16); c->align(kAlignCode, 16);
c->bind(noInterrupt); c->bind(no_intr);
} }
c->mov(SPU_OFF_32(pc), *addr); c->mov(SPU_OFF_32(pc), *addr);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
c->jmp(x86::r10); c->jmp(x86::r10);
} }
@ -1348,25 +1437,33 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
c->movdqa(SPU_OFF_128(gpr, op.rt), vr); c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
} }
static void spu_rdch_ret(SPUThread& spu, void*, u32)
{
// MSVC workaround (TCO)
}
static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32))
{
const s64 result = _spu->get_ch_value(ch);
if (result < 0)
{
_ret = &spu_rdch_ret;
}
// Return channel value in the third argument
_ret(*_spu, _spu->_ptr<u8>(0), static_cast<u32>(result));
}
void spu_recompiler::RDCH(spu_opcode_t op) void spu_recompiler::RDCH(spu_opcode_t op)
{ {
using namespace asmjit; using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
u32 value;
if (_spu->get_ch_value(ch, value))
{
*out = v128::from32r(value);
_spu->pc += 4;
}
};
auto read_channel = [&](X86Mem channel_ptr, bool sync = true) auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
{ {
Label wait = c->newLabel(); Label wait = c->newLabel();
Label again = c->newLabel(); Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(addr->r64(), channel_ptr); c->mov(addr->r64(), channel_ptr);
c->xor_(qw0->r32(), qw0->r32()); c->xor_(qw0->r32(), qw0->r32());
c->align(kAlignCode, 16); c->align(kAlignCode, 16);
@ -1376,12 +1473,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos] after.emplace_back([=, pos = m_pos]
{ {
// Do not continue after waiting
c->bind(wait); c->bind(wait);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate)); c->jmp(imm_ptr(spu_rdch));
}); });
if (sync) if (sync)
@ -1396,10 +1492,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(channel_ptr, *qw0); c->mov(channel_ptr, *qw0);
} }
const XmmLink& vr = XmmAlloc(); c->mov(qw0->r32(), *addr);
c->movd(vr, *addr); c->bind(ret);
c->pslldq(vr, 12); c->movd(x86::xmm0, qw0->r32());
c->movdqa(SPU_OFF_128(gpr, op.rt), vr); c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}; };
switch (op.ra) switch (op.ra)
@ -1415,42 +1512,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
case SPU_RdInMbox: case SPU_RdInMbox:
{ {
// TODO // TODO
Label wait = c->newLabel(); break;
Label next = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(x86::byte_ptr(*cpu, offset32(&SPUThread::ch_in_mbox) + 1), 0);
c->jz(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
});
auto sub = [](SPUThread* _spu, v128* out, spu_function_t _ret)
{
// Workaround for gcc (TCO)
static thread_local u32 value;
if (!_spu->get_ch_value(SPU_RdInMbox, value))
{
// Workaround for MSVC (TCO)
fmt::raw_error("spu_recompiler::RDCH(): unexpected SPUThread::get_ch_value(SPU_RdInMbox) call");
}
*out = v128::from32r(value);
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, x86::qword_ptr(next));
c->jmp(imm_ptr<void(*)(SPUThread*, v128*, spu_function_t)>(sub));
c->align(kAlignCode, 16);
c->bind(next);
return;
} }
case MFC_RdTagStat: case MFC_RdTagStat:
{ {
@ -1489,7 +1551,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{ {
LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos); LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos);
auto gate1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{ {
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp); const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1500,7 +1562,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr); _ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}; };
auto gate2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret) auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{ {
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp); const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1514,7 +1576,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(SPU_OFF_32(pc), m_pos); c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, SPU_OFF_128(gpr, op.rt)); c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, asmjit::x86::qword_ptr(next)); c->lea(*qw0, asmjit::x86::qword_ptr(next));
c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(gate1) : asmjit::imm_ptr<ftype>(gate2)); c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(sub1) : asmjit::imm_ptr<ftype>(sub2));
c->align(asmjit::kAlignCode, 16); c->align(asmjit::kAlignCode, 16);
c->bind(next); c->bind(next);
return; return;
@ -1532,22 +1594,23 @@ void spu_recompiler::RDCH(spu_opcode_t op)
LOG_WARNING(SPU, "[0x%x] RDCH: RdEventStat", m_pos); LOG_WARNING(SPU, "[0x%x] RDCH: RdEventStat", m_pos);
get_events(); get_events();
Label wait = c->newLabel(); Label wait = c->newLabel();
Label ret = c->newLabel();
c->jz(wait); c->jz(wait);
after.emplace_back([=, pos = m_pos] after.emplace_back([=, pos = m_pos]
{ {
// Do not continue after waiting
c->bind(wait); c->bind(wait);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate)); c->jmp(imm_ptr(spu_rdch));
}); });
const XmmLink& vr = XmmAlloc(); c->mov(qw0->r32(), *addr);
c->movd(vr, *addr); c->bind(ret);
c->pslldq(vr, 12); c->movd(x86::xmm0, qw0->r32());
c->movdqa(SPU_OFF_128(gpr, op.rt), vr); c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
return; return;
} }
case SPU_RdMachStat: case SPU_RdMachStat:
@ -1561,23 +1624,28 @@ void spu_recompiler::RDCH(spu_opcode_t op)
} }
} }
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos); c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate)); c->jmp(imm_ptr(spu_rdch));
m_pos = -1; c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res))
{
// Put result into the third argument
const u32 res = _spu->get_ch_count(ch);
_ret(*_spu, _spu->_ptr<u8>(0), res);
} }
void spu_recompiler::RCHCNT(spu_opcode_t op) void spu_recompiler::RCHCNT(spu_opcode_t op)
{ {
using namespace asmjit; using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
*out = v128::from32r(_spu->get_ch_count(ch));
_spu->pc += 4;
};
auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false) auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
{ {
// Load channel count // Load channel count
@ -1638,21 +1706,26 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
{ {
LOG_WARNING(SPU, "[0x%x] RCHCNT: RdEventStat", m_pos); LOG_WARNING(SPU, "[0x%x] RCHCNT: RdEventStat", m_pos);
get_events(); get_events();
c->setnz(addr->r8()); c->setnz(qw0->r8());
c->movzx(*addr, addr->r8()); c->movzx(qw0->r32(), qw0->r8());
c->movd(x86::xmm0, *addr); break;
c->pslldq(x86::xmm0, 12); }
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0); default:
return; {
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rchcnt));
c->bind(ret);
break;
} }
} }
// Non-returnable fallback for unsupported events // Use result from the third argument
c->mov(SPU_OFF_32(pc), m_pos); c->movd(x86::xmm0, qw0->r32());
c->mov(*ls, op.ra); c->pslldq(x86::xmm0, 12);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt)); c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
m_pos = -1;
} }
void spu_recompiler::SF(spu_opcode_t op) void spu_recompiler::SF(spu_opcode_t op)
@ -2310,18 +2383,35 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
// Check SPUInterpreter for notes. // Check SPUInterpreter for notes.
} }
static void spu_wrch_ret(SPUThread& _spu, void*, u8*)
{
// MSVC workaround (TCO)
}
static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
{
if (!_spu->set_ch_value(ch, value))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
void spu_recompiler::WRCH(spu_opcode_t op) void spu_recompiler::WRCH(spu_opcode_t op)
{ {
using namespace asmjit; using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, u32 value)
{
if (_spu->set_ch_value(ch, value))
{
_spu->pc += 4;
}
};
switch (op.ra) switch (op.ra)
{ {
case SPU_WrSRR0: case SPU_WrSRR0:
@ -2332,42 +2422,14 @@ void spu_recompiler::WRCH(spu_opcode_t op)
} }
case SPU_WrOutIntrMbox: case SPU_WrOutIntrMbox:
{ {
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value) // Can't seemingly be optimized
{ break;
if (!_spu->set_ch_value(SPU_WrOutIntrMbox, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(SPU_WrOutIntrMbox) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel();
Label wait = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->bt(SPU_OFF_64(ch_out_intr_mbox), spu_channel::off_count);
c->jc(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
});
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub));
c->align(kAlignCode, 16);
c->bind(ret);
return;
} }
case SPU_WrOutMbox: case SPU_WrOutMbox:
{ {
Label wait = c->newLabel(); Label wait = c->newLabel();
Label again = c->newLabel(); Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox)); c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
c->align(kAlignCode, 16); c->align(kAlignCode, 16);
@ -2378,16 +2440,17 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos] after.emplace_back([=, pos = m_pos]
{ {
// Do not continue after waiting
c->bind(wait); c->bind(wait);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate)); c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
}); });
c->bts(*qw0, spu_channel::off_count); c->bts(*qw0, spu_channel::off_count);
c->lock().cmpxchg(SPU_OFF_64(ch_out_mbox), *qw0); c->lock().cmpxchg(SPU_OFF_64(ch_out_mbox), *qw0);
c->jnz(again); c->jnz(again);
c->bind(ret);
return; return;
} }
case MFC_WrTagMask: case MFC_WrTagMask:
@ -2401,21 +2464,11 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos] after.emplace_back([=, pos = m_pos]
{ {
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value)
{
if (!_spu->set_ch_value(MFC_WrTagMask, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(MFC_WrTagMask) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->bind(upd); c->bind(upd);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->lea(*ls, x86::qword_ptr(ret)); c->lea(ls->r32(), MFC_WrTagMask);
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub)); c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
}); });
c->bind(ret); c->bind(ret);
@ -2434,8 +2487,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{ {
c->bind(fail); c->bind(fail);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate)); c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
c->bind(zero); c->bind(zero);
c->mov(SPU_OFF_32(ch_tag_upd), qw0->r32()); c->mov(SPU_OFF_32(ch_tag_upd), qw0->r32());
@ -2496,22 +2550,12 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case MFC_Cmd: case MFC_Cmd:
{ {
// TODO // TODO
auto sub = [](SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
throw cpu_flag::ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel(); Label ret = c->newLabel();
c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8()); c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8());
c->mov(SPU_OFF_32(pc), m_pos); c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, x86::qword_ptr(ret)); c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub)); c->jmp(imm_ptr(spu_wrch_mfc));
c->align(kAlignCode, 16); c->align(kAlignCode, 16);
c->bind(ret); c->bind(ret);
return; return;
@ -2555,6 +2599,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case SPU_WrEventMask: case SPU_WrEventMask:
{ {
Label fail = c->newLabel(); Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(*addr, ~SPU_EVENT_IMPLEMENTED); c->mov(*addr, ~SPU_EVENT_IMPLEMENTED);
c->mov(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED); c->mov(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
@ -2567,16 +2612,19 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{ {
c->bind(fail); c->bind(fail);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate)); c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
}); });
c->mov(SPU_OFF_32(ch_event_mask), qw0->r32()); c->mov(SPU_OFF_32(ch_event_mask), qw0->r32());
c->bind(ret);
return; return;
} }
case SPU_WrEventAck: case SPU_WrEventAck:
{ {
Label fail = c->newLabel(); Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->test(qw0->r32(), ~SPU_EVENT_IMPLEMENTED); c->test(qw0->r32(), ~SPU_EVENT_IMPLEMENTED);
c->jnz(fail); c->jnz(fail);
@ -2585,8 +2633,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{ {
c->bind(fail); c->bind(fail);
c->mov(SPU_OFF_32(pc), pos); c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate)); c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
}); });
c->not_(qw0->r32()); c->not_(qw0->r32());
@ -2599,11 +2648,13 @@ void spu_recompiler::WRCH(spu_opcode_t op)
} }
} }
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos); c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra); c->mov(ls->r32(), op.ra);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3)); c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate)); c->lea(*qw1, x86::qword_ptr(ret));
m_pos = -1; c->jmp(imm_ptr(spu_wrch));
c->bind(ret);
} }
void spu_recompiler::BIZ(spu_opcode_t op) void spu_recompiler::BIZ(spu_opcode_t op)

View file

@ -59,6 +59,15 @@ private:
std::vector<std::function<void()>> after; std::vector<std::function<void()>> after;
std::vector<std::function<void()>> consts; std::vector<std::function<void()>> consts;
// Function return label
asmjit::Label label_stop;
// Indirect branch dispatch table
asmjit::Label instr_table;
// All valid instruction labels
std::map<u32, asmjit::Label> instr_labels;
// All emitted 128-bit consts // All emitted 128-bit consts
std::map<std::pair<u64, u64>, asmjit::Label> xmm_consts; std::map<std::pair<u64, u64>, asmjit::Label> xmm_consts;

View file

@ -94,14 +94,14 @@ bool spu_interpreter::MFSPR(SPUThread& spu, spu_opcode_t op)
bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op) bool spu_interpreter::RDCH(SPUThread& spu, spu_opcode_t op)
{ {
u32 result; const s64 result = spu.get_ch_value(op.ra);
if (!spu.get_ch_value(op.ra, result)) if (result < 0)
{ {
return false; return false;
} }
spu.gpr[op.rt] = v128::from32r(result); spu.gpr[op.rt] = v128::from32r(static_cast<u32>(result));
return true; return true;
} }

View file

@ -119,6 +119,22 @@ void fmt_class_string<spu_decoder_type>::format(std::string& out, u64 arg)
}); });
} }
template <>
void fmt_class_string<spu_block_size_type>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](spu_block_size_type type)
{
switch (type)
{
case spu_block_size_type::safe: return "Safe";
case spu_block_size_type::mega: return "Mega";
case spu_block_size_type::giga: return "Giga";
}
return unknown;
});
}
namespace spu namespace spu
{ {
namespace scheduler namespace scheduler
@ -1497,11 +1513,11 @@ u32 SPUThread::get_ch_count(u32 ch)
fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???"); fmt::throw_exception("Unknown/illegal channel (ch=%d [%s])" HERE, ch, ch < 128 ? spu_ch_name[ch] : "???");
} }
bool SPUThread::get_ch_value(u32 ch, u32& out) s64 SPUThread::get_ch_value(u32 ch)
{ {
LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???"); LOG_TRACE(SPU, "get_ch_value(ch=%d [%s])", ch, ch < 128 ? spu_ch_name[ch] : "???");
auto read_channel = [&](spu_channel& channel) auto read_channel = [&](spu_channel& channel) -> s64
{ {
for (int i = 0; i < 10 && channel.get_count() == 0; i++) for (int i = 0; i < 10 && channel.get_count() == 0; i++)
{ {
@ -1515,25 +1531,26 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
} }
} }
u32 out;
while (!channel.try_pop(out)) while (!channel.try_pop(out))
{ {
if (test(state, cpu_flag::stop)) if (test(state, cpu_flag::stop))
{ {
return false; return -1;
} }
thread_ctrl::wait(); thread_ctrl::wait();
} }
return true; return out;
}; };
switch (ch) switch (ch)
{ {
case SPU_RdSRR0: case SPU_RdSRR0:
{ {
out = srr0; return srr0;
return true;
} }
case SPU_RdInMbox: case SPU_RdInMbox:
{ {
@ -1551,6 +1568,8 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
} }
} }
u32 out;
if (const uint old_count = ch_in_mbox.try_pop(out)) if (const uint old_count = ch_in_mbox.try_pop(out))
{ {
if (old_count == 4 /* SPU_IN_MBOX_THRESHOLD */) // TODO: check this if (old_count == 4 /* SPU_IN_MBOX_THRESHOLD */) // TODO: check this
@ -1558,12 +1577,12 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT); int_ctrl[2].set(SPU_INT2_STAT_SPU_MAILBOX_THRESHOLD_INT);
} }
return true; return out;
} }
if (test(state & cpu_flag::stop)) if (test(state & cpu_flag::stop))
{ {
return false; return -1;
} }
thread_ctrl::wait(); thread_ctrl::wait();
@ -1579,9 +1598,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
if (ch_tag_stat.get_count()) if (ch_tag_stat.get_count())
{ {
out = ch_tag_stat.get_value(); u32 out = ch_tag_stat.get_value();
ch_tag_stat.set_value(0, false); ch_tag_stat.set_value(0, false);
return true; return out;
} }
// Will stall infinitely // Will stall infinitely
@ -1590,8 +1609,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
case MFC_RdTagMask: case MFC_RdTagMask:
{ {
out = ch_tag_mask; return ch_tag_mask;
return true;
} }
case SPU_RdSigNotify1: case SPU_RdSigNotify1:
@ -1608,9 +1626,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{ {
if (ch_atomic_stat.get_count()) if (ch_atomic_stat.get_count())
{ {
out = ch_atomic_stat.get_value(); u32 out = ch_atomic_stat.get_value();
ch_atomic_stat.set_value(0, false); ch_atomic_stat.set_value(0, false);
return true; return out;
} }
// Will stall infinitely // Will stall infinitely
@ -1621,9 +1639,9 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{ {
if (ch_stall_stat.get_count()) if (ch_stall_stat.get_count())
{ {
out = ch_stall_stat.get_value(); u32 out = ch_stall_stat.get_value();
ch_stall_stat.set_value(0, false); ch_stall_stat.set_value(0, false);
return true; return out;
} }
// Will stall infinitely // Will stall infinitely
@ -1632,19 +1650,18 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
case SPU_RdDec: case SPU_RdDec:
{ {
out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp); u32 out = ch_dec_value - (u32)(get_timebased_time() - ch_dec_start_timestamp);
//Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down //Polling: We might as well hint to the scheduler to slot in another thread since this one is counting down
if (g_cfg.core.spu_loop_detection && out > spu::scheduler::native_jiffy_duration_us) if (g_cfg.core.spu_loop_detection && out > spu::scheduler::native_jiffy_duration_us)
std::this_thread::yield(); std::this_thread::yield();
return true; return out;
} }
case SPU_RdEventMask: case SPU_RdEventMask:
{ {
out = ch_event_mask; return ch_event_mask;
return true;
} }
case SPU_RdEventStat: case SPU_RdEventStat:
@ -1658,8 +1675,7 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
if (res) if (res)
{ {
out = res; return res;
return true;
} }
vm::waiter waiter; vm::waiter waiter;
@ -1678,22 +1694,20 @@ bool SPUThread::get_ch_value(u32 ch, u32& out)
{ {
if (test(state & cpu_flag::stop)) if (test(state & cpu_flag::stop))
{ {
return false; return -1;
} }
thread_ctrl::wait_for(100); thread_ctrl::wait_for(100);
} }
out = res; return res;
return true;
} }
case SPU_RdMachStat: case SPU_RdMachStat:
{ {
// HACK: "Not isolated" status // HACK: "Not isolated" status
// Return SPU Interrupt status in LSB // Return SPU Interrupt status in LSB
out = interrupts_enabled == true; return interrupts_enabled == true;
return true;
} }
} }

View file

@ -609,7 +609,7 @@ public:
void set_events(u32 mask); void set_events(u32 mask);
void set_interrupt_status(bool enable); void set_interrupt_status(bool enable);
u32 get_ch_count(u32 ch); u32 get_ch_count(u32 ch);
bool get_ch_value(u32 ch, u32& out); s64 get_ch_value(u32 ch);
bool set_ch_value(u32 ch, u32 value); bool set_ch_value(u32 ch, u32 value);
bool stop_and_signal(u32 code); bool stop_and_signal(u32 code);
void halt(); void halt();

View file

@ -30,6 +30,13 @@ enum class spu_decoder_type
llvm, llvm,
}; };
enum class spu_block_size_type
{
safe,
mega,
giga,
};
enum class lib_loading_type enum class lib_loading_type
{ {
automatic, automatic,
@ -303,6 +310,7 @@ struct cfg_root : cfg::node
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield
cfg::_bool spu_shared_runtime{this, "SPU Shared Runtime", true}; // Share compiled SPU functions between all threads cfg::_bool spu_shared_runtime{this, "SPU Shared Runtime", true}; // Share compiled SPU functions between all threads
cfg::_enum<spu_block_size_type> spu_block_size{this, "SPU Block Size"};
cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::liblv2only}; cfg::_enum<lib_loading_type> lib_loading{this, "Lib Loader", lib_loading_type::liblv2only};
cfg::_bool hook_functions{this, "Hook static functions"}; cfg::_bool hook_functions{this, "Hook static functions"};