SPU ASMJIT: internal jumptable

Allow indirect calls within current function using a jumptable
This restores some functionality removed in SPU ASMJIT 2.0
Change SPUThread::get_ch_value prototype
This commit is contained in:
Nekotekina 2018-04-30 19:39:06 +03:00
parent df453d6d4f
commit 767dfa271e
7 changed files with 302 additions and 229 deletions

View file

@ -152,7 +152,7 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
vec[i] = vec_vars[i];
}
Label label_stop = c->newLabel();
label_stop = c->newLabel();
Label label_diff = c->newLabel();
Label label_code = c->newLabel();
std::vector<u32> words;
@ -163,6 +163,15 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
const u32 start = m_pos;
const u32 end = m_pos + (func.size() - 1) * 4;
// Create instruction labels (TODO: some of them are unnecessary)
for (u32 i = 1; i < func.size(); i++)
{
if (func[i])
{
instr_labels[i * 4 - 4 + m_pos] = c->newLabel();
}
}
// Set PC and check status
c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(SPU_OFF_32(state), 0);
@ -713,6 +722,14 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
// Update position
m_pos = pos;
// Bind instruction label if necessary
const auto found = instr_labels.find(pos);
if (found != instr_labels.end())
{
c->bind(found->second);
}
// Execute recompiler function
(this->*s_spu_decoder.decode(op))({op});
@ -750,6 +767,27 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work();
}
// Build instruction dispatch table
if (instr_table.isValid())
{
c->align(kAlignData, 8);
c->bind(instr_table);
for (u32 addr = start; addr < end; addr += 4)
{
const auto found = instr_labels.find(addr);
if (found != instr_labels.end())
{
c->embedLabel(found->second);
}
else
{
c->embedLabel(label_stop);
}
}
}
c->align(kAlignData, words_align);
c->bind(label_code);
for (u32 d : words)
@ -760,6 +798,9 @@ spu_function_t spu_recompiler::compile(const std::vector<u32>& func)
work();
}
label_stop.reset();
instr_table.reset();
instr_labels.clear();
xmm_consts.clear();
// Compile and get function address
@ -1066,6 +1107,28 @@ void spu_recompiler::branch_fixed(u32 target)
{
using namespace asmjit;
// Check local branch
const auto local = instr_labels.find(target);
if (local != instr_labels.end() && local->second.isValid())
{
c->cmp(SPU_OFF_32(state), 0);
c->jz(local->second);
c->mov(SPU_OFF_32(pc), target);
c->ret();
return;
}
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
{
// Don't generate patch points in this mode
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
c->xor_(qw0->r32(), qw0->r32());
c->jmp(x86::rax);
return;
}
// Set patch address as a third argument and fallback to it
Label patch_point = c->newLabel();
c->lea(*qw0, x86::qword_ptr(patch_point));
@ -1109,9 +1172,23 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
{
using namespace asmjit;
// Load indirect jump address
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
if (!instr_table.isValid())
{
// Request instruction table
instr_table = c->newLabel();
}
const u32 start = instr_labels.begin()->first;
const u32 end = instr_labels.rbegin()->first + 4;
// Load indirect jump address, choose between local and external
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->lea(*qw1, x86::qword_ptr(*addr, 0 - start));
c->xor_(qw0->r32(), qw0->r32());
c->cmp(qw1->r32(), end - start);
c->cmovae(qw1->r32(), qw0->r32());
c->cmovb(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->cmovae(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
if (op.d)
{
@ -1119,22 +1196,34 @@ void spu_recompiler::branch_indirect(spu_opcode_t op)
}
else if (op.e)
{
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(x86::r9d, SPU_OFF_32(ch_event_stat));
c->and_(x86::r9d, SPU_OFF_32(ch_event_mask));
c->and_(x86::r9d, SPU_EVENT_INTR_TEST);
c->cmp(x86::r9d, 0);
Label no_intr = c->newLabel();
Label intr = c->newLabel();
Label fail = c->newLabel();
Label noInterrupt = c->newLabel();
c->je(noInterrupt);
c->lock().bts(SPU_OFF_8(interrupts_enabled), 0);
c->mov(qw1->r32(), SPU_OFF_32(ch_event_mask));
c->test(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(fail);
c->and_(qw1->r32(), SPU_OFF_32(ch_event_stat));
c->test(qw1->r32(), SPU_EVENT_INTR_IMPLEMENTED);
c->jnz(intr);
c->jmp(no_intr);
c->bind(fail);
c->mov(SPU_OFF_32(pc), *addr);
c->mov(addr->r64(), reinterpret_cast<u64>(vm::base(0xffdead00)));
c->mov(asmjit::x86::dword_ptr(addr->r64()), "INTR"_u32);
c->bind(intr);
c->lock().btr(SPU_OFF_8(interrupts_enabled), 0);
c->mov(SPU_OFF_32(srr0), *addr);
branch_fixed(0);
c->mov(*addr, qw0->r32());
c->mov(x86::r10, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher)));
c->align(kAlignCode, 16);
c->bind(noInterrupt);
c->bind(no_intr);
}
c->mov(SPU_OFF_32(pc), *addr);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
c->jmp(x86::r10);
}
@ -1348,25 +1437,33 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
}
static void spu_rdch_ret(SPUThread& spu, void*, u32)
{
// MSVC workaround (TCO)
}
static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32))
{
const s64 result = _spu->get_ch_value(ch);
if (result < 0)
{
_ret = &spu_rdch_ret;
}
// Return channel value in the third argument
_ret(*_spu, _spu->_ptr<u8>(0), static_cast<u32>(result));
}
void spu_recompiler::RDCH(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
u32 value;
if (_spu->get_ch_value(ch, value))
{
*out = v128::from32r(value);
_spu->pc += 4;
}
};
auto read_channel = [&](X86Mem channel_ptr, bool sync = true)
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(addr->r64(), channel_ptr);
c->xor_(qw0->r32(), qw0->r32());
c->align(kAlignCode, 16);
@ -1376,12 +1473,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
});
if (sync)
@ -1396,10 +1492,11 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(channel_ptr, *qw0);
}
const XmmLink& vr = XmmAlloc();
c->movd(vr, *addr);
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(qw0->r32(), *addr);
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
};
switch (op.ra)
@ -1415,42 +1512,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
case SPU_RdInMbox:
{
// TODO
Label wait = c->newLabel();
Label next = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->cmp(x86::byte_ptr(*cpu, offset32(&SPUThread::ch_in_mbox) + 1), 0);
c->jz(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
});
auto sub = [](SPUThread* _spu, v128* out, spu_function_t _ret)
{
// Workaround for gcc (TCO)
static thread_local u32 value;
if (!_spu->get_ch_value(SPU_RdInMbox, value))
{
// Workaround for MSVC (TCO)
fmt::raw_error("spu_recompiler::RDCH(): unexpected SPUThread::get_ch_value(SPU_RdInMbox) call");
}
*out = v128::from32r(value);
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, x86::qword_ptr(next));
c->jmp(imm_ptr<void(*)(SPUThread*, v128*, spu_function_t)>(sub));
c->align(kAlignCode, 16);
c->bind(next);
return;
break;
}
case MFC_RdTagStat:
{
@ -1489,7 +1551,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{
LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos);
auto gate1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1500,7 +1562,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
auto gate2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1514,7 +1576,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, SPU_OFF_128(gpr, op.rt));
c->lea(*qw0, asmjit::x86::qword_ptr(next));
c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(gate1) : asmjit::imm_ptr<ftype>(gate2));
c->jmp(g_cfg.core.spu_loop_detection ? asmjit::imm_ptr<ftype>(sub1) : asmjit::imm_ptr<ftype>(sub2));
c->align(asmjit::kAlignCode, 16);
c->bind(next);
return;
@ -1532,22 +1594,23 @@ void spu_recompiler::RDCH(spu_opcode_t op)
LOG_WARNING(SPU, "[0x%x] RDCH: RdEventStat", m_pos);
get_events();
Label wait = c->newLabel();
Label ret = c->newLabel();
c->jz(wait);
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
});
const XmmLink& vr = XmmAlloc();
c->movd(vr, *addr);
c->pslldq(vr, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(qw0->r32(), *addr);
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
return;
}
case SPU_RdMachStat:
@ -1561,23 +1624,28 @@ void spu_recompiler::RDCH(spu_opcode_t op)
}
}
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
m_pos = -1;
c->mov(ls->r32(), op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rdch));
c->bind(ret);
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res))
{
// Put result into the third argument
const u32 res = _spu->get_ch_count(ch);
_ret(*_spu, _spu->_ptr<u8>(0), res);
}
void spu_recompiler::RCHCNT(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, v128* out)
{
*out = v128::from32r(_spu->get_ch_count(ch));
_spu->pc += 4;
};
auto ch_cnt = [&](X86Mem channel_ptr, bool inv = false)
{
// Load channel count
@ -1638,21 +1706,26 @@ void spu_recompiler::RCHCNT(spu_opcode_t op)
{
LOG_WARNING(SPU, "[0x%x] RCHCNT: RdEventStat", m_pos);
get_events();
c->setnz(addr->r8());
c->movzx(*addr, addr->r8());
c->movd(x86::xmm0, *addr);
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
return;
c->setnz(qw0->r8());
c->movzx(qw0->r32(), qw0->r8());
break;
}
default:
{
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_rchcnt));
c->bind(ret);
break;
}
}
// Non-returnable fallback for unsupported events
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->lea(*qw0, SPU_OFF_128(gpr, op.rt));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, v128*)>(gate));
m_pos = -1;
// Use result from the third argument
c->movd(x86::xmm0, qw0->r32());
c->pslldq(x86::xmm0, 12);
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
void spu_recompiler::SF(spu_opcode_t op)
@ -2310,18 +2383,35 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
// Check SPUInterpreter for notes.
}
static void spu_wrch_ret(SPUThread& _spu, void*, u8*)
{
// MSVC workaround (TCO)
}
static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
{
if (!_spu->set_ch_value(ch, value))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
_ret = &spu_wrch_ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
void spu_recompiler::WRCH(spu_opcode_t op)
{
using namespace asmjit;
auto gate = [](SPUThread* _spu, u32 ch, u32 value)
{
if (_spu->set_ch_value(ch, value))
{
_spu->pc += 4;
}
};
switch (op.ra)
{
case SPU_WrSRR0:
@ -2332,42 +2422,14 @@ void spu_recompiler::WRCH(spu_opcode_t op)
}
case SPU_WrOutIntrMbox:
{
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value)
{
if (!_spu->set_ch_value(SPU_WrOutIntrMbox, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(SPU_WrOutIntrMbox) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel();
Label wait = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->bt(SPU_OFF_64(ch_out_intr_mbox), spu_channel::off_count);
c->jc(wait);
after.emplace_back([=]
{
// Do not continue after waiting
c->bind(wait);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
});
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub));
c->align(kAlignCode, 16);
c->bind(ret);
return;
// Can't seemingly be optimized
break;
}
case SPU_WrOutMbox:
{
Label wait = c->newLabel();
Label again = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(addr->r64(), SPU_OFF_64(ch_out_mbox));
c->align(kAlignCode, 16);
@ -2378,16 +2440,17 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
// Do not continue after waiting
c->bind(wait);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->bts(*qw0, spu_channel::off_count);
c->lock().cmpxchg(SPU_OFF_64(ch_out_mbox), *qw0);
c->jnz(again);
c->bind(ret);
return;
}
case MFC_WrTagMask:
@ -2401,21 +2464,11 @@ void spu_recompiler::WRCH(spu_opcode_t op)
after.emplace_back([=, pos = m_pos]
{
auto sub = [](SPUThread* _spu, spu_function_t _ret, u32 value)
{
if (!_spu->set_ch_value(MFC_WrTagMask, value))
{
fmt::raw_error("spu_recompiler::WRCH(): unexpected SPUThread::set_ch_value(MFC_WrTagMask) call");
}
// Continue
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
c->bind(upd);
c->mov(SPU_OFF_32(pc), pos);
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t, u32)>(sub));
c->lea(ls->r32(), MFC_WrTagMask);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->bind(ret);
@ -2434,8 +2487,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
c->bind(zero);
c->mov(SPU_OFF_32(ch_tag_upd), qw0->r32());
@ -2496,22 +2550,12 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case MFC_Cmd:
{
// TODO
auto sub = [](SPUThread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
throw cpu_flag::ret;
}
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
Label ret = c->newLabel();
c->mov(*addr, SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(SPU_OFF_8(ch_mfc_cmd, &spu_mfc_cmd::cmd), addr->r8());
c->mov(SPU_OFF_32(pc), m_pos);
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
c->jmp(imm_ptr(spu_wrch_mfc));
c->align(kAlignCode, 16);
c->bind(ret);
return;
@ -2555,6 +2599,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
case SPU_WrEventMask:
{
Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->mov(*addr, ~SPU_EVENT_IMPLEMENTED);
c->mov(qw1->r32(), ~SPU_EVENT_INTR_IMPLEMENTED);
@ -2567,16 +2612,19 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->mov(SPU_OFF_32(ch_event_mask), qw0->r32());
c->bind(ret);
return;
}
case SPU_WrEventAck:
{
Label fail = c->newLabel();
Label ret = c->newLabel();
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->test(qw0->r32(), ~SPU_EVENT_IMPLEMENTED);
c->jnz(fail);
@ -2585,8 +2633,9 @@ void spu_recompiler::WRCH(spu_opcode_t op)
{
c->bind(fail);
c->mov(SPU_OFF_32(pc), pos);
c->mov(*ls, op.ra);
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
c->mov(ls->r32(), op.ra);
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
});
c->not_(qw0->r32());
@ -2599,11 +2648,13 @@ void spu_recompiler::WRCH(spu_opcode_t op)
}
}
Label ret = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.ra);
c->mov(ls->r32(), op.ra);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
c->jmp(imm_ptr<void(*)(SPUThread*, u32, u32)>(gate));
m_pos = -1;
c->lea(*qw1, x86::qword_ptr(ret));
c->jmp(imm_ptr(spu_wrch));
c->bind(ret);
}
void spu_recompiler::BIZ(spu_opcode_t op)