Migration to named_thread<>

Add atomic_t<>::try_dec instead of fetch_dec_sat
Add atomic_t<>::try_inc
GDBDebugServer is broken (needs rewrite)
Removed old_thread class (former named_thread)
Removed storing/rethrowing exceptions from thread
Emu.Stop doesn't inject an exception anymore
task_stack helper class removed
thread_base simplified (no shared_from_this)
thread_ctrl::spawn simplified (creates detached thread)
Implemented overrideable thread detaching logic
Disabled cellAdec, cellDmux, cellFsAio
SPUThread renamed to spu_thread
RawSPUThread removed, spu_thread used instead
Disabled deriving from ppu_thread
Partial support for thread renaming
lv2_timer... simplified, screw it
idm/fxm: butchered support for on_stop/on_init
vm: improved allocation structure (added size)
This commit is contained in:
Nekotekina 2018-10-11 01:17:19 +03:00
parent 8ca6c9fff0
commit 1b37e775be
82 changed files with 1820 additions and 2023 deletions

View file

@ -16,11 +16,11 @@
#include "SPUASMJITRecompiler.h"
#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&SPUThread::x, ##__VA_ARGS__))
#define SPU_OFF_128(x, ...) asmjit::x86::oword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
#define SPU_OFF_64(x, ...) asmjit::x86::qword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
#define SPU_OFF_32(x, ...) asmjit::x86::dword_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
#define SPU_OFF_16(x, ...) asmjit::x86::word_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
#define SPU_OFF_8(x, ...) asmjit::x86::byte_ptr(*cpu, offset32(&spu_thread::x, ##__VA_ARGS__))
extern const spu_decoder<spu_interpreter_fast> g_spu_interpreter_fast; // TODO: avoid
const spu_decoder<spu_recompiler> s_spu_decoder;
@ -1177,12 +1177,12 @@ inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
return XmmConst(v128::fromV(data));
}
static void check_state_ret(SPUThread& _spu, void*, u8*)
static void check_state_ret(spu_thread& _spu, void*, u8*)
{
// MSVC workaround (TCO)
}
static void check_state(SPUThread* _spu, spu_function_t _ret)
static void check_state(spu_thread* _spu, spu_function_t _ret)
{
if (_spu->state && _spu->check_state())
{
@ -1209,7 +1209,7 @@ void spu_recompiler::branch_fixed(u32 target)
return;
}
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&SPUThread::jit_dispatcher) + target * 2));
c->mov(x86::rax, x86::qword_ptr(*cpu, offset32(&spu_thread::jit_dispatcher) + target * 2));
c->mov(SPU_OFF_32(pc), target);
c->cmp(SPU_OFF_32(state), 0);
c->jnz(label_stop);
@ -1251,7 +1251,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
}
else if (op.e)
{
auto _throw = [](SPUThread* _spu)
auto _throw = [](spu_thread* _spu)
{
fmt::throw_exception("SPU Interrupts not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask);
};
@ -1270,7 +1270,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->jmp(no_intr);
c->bind(fail);
c->mov(SPU_OFF_32(pc), *addr);
c->jmp(imm_ptr<void(*)(SPUThread*)>(_throw));
c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
// Save addr in srr0 and disable interrupts
c->bind(intr);
@ -1292,7 +1292,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
if (!jt && g_cfg.core.spu_block_size != spu_block_size_type::giga)
{
// Simply external call (return or indirect call)
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
c->mov(x86::r10, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
}
else
{
@ -1311,7 +1311,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
c->lea(x86::r10, x86::qword_ptr(instr_table));
c->cmp(qw1->r32(), end - start);
c->lea(x86::r10, x86::qword_ptr(x86::r10, *qw1, 1, 0));
c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&SPUThread::jit_dispatcher)));
c->lea(*qw1, x86::qword_ptr(*cpu, addr->r64(), 1, offset32(&spu_thread::jit_dispatcher)));
c->cmovae(x86::r10, *qw1);
c->mov(x86::r10, x86::qword_ptr(x86::r10));
}
@ -1321,7 +1321,7 @@ void spu_recompiler::branch_indirect(spu_opcode_t op, bool jt, bool ret)
// Get stack pointer, try to use native return address (check SPU return address)
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
c->and_(qw1->r32(), 0x3fff0);
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)));
c->cmp(x86::dword_ptr(*qw1, 8), *addr);
c->cmove(x86::r10, x86::qword_ptr(*qw1));
}
@ -1352,7 +1352,7 @@ void spu_recompiler::branch_set_link(u32 target)
// Get stack pointer, write native and SPU return addresses into the stack mirror
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
c->and_(qw1->r32(), 0x3fff0);
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)));
c->lea(*qw1, x86::qword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)));
c->lea(x86::r10, x86::qword_ptr(ret));
c->mov(x86::qword_ptr(*qw1, 0), x86::r10);
c->mov(x86::qword_ptr(*qw1, 8), target);
@ -1365,7 +1365,7 @@ void spu_recompiler::branch_set_link(u32 target)
c->mov(qw1->r32(), SPU_OFF_32(gpr, 1, &v128::_u32, 3));
c->and_(qw1->r32(), 0x3fff0);
c->pcmpeqd(x86::xmm0, x86::xmm0);
c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&SPUThread::stack_mirror)), x86::xmm0);
c->movdqa(x86::dqword_ptr(*cpu, *qw1, 0, ::offset32(&spu_thread::stack_mirror)), x86::xmm0);
c->jmp(target);
});
}
@ -1374,7 +1374,7 @@ void spu_recompiler::branch_set_link(u32 target)
void spu_recompiler::fall(spu_opcode_t op)
{
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret)
auto gate = [](spu_thread* _spu, u32 opcode, spu_inter_func_t _func, spu_function_t _ret)
{
if (!_func(*_spu, {opcode}))
{
@ -1391,7 +1391,7 @@ void spu_recompiler::fall(spu_opcode_t op)
c->mov(*ls, op.opcode);
c->mov(*qw0, asmjit::imm_ptr(asmjit::Internal::ptr_cast<void*>(g_spu_interpreter_fast.decode(op.opcode))));
c->lea(*qw1, asmjit::x86::qword_ptr(next));
c->jmp(asmjit::imm_ptr<void(*)(SPUThread*, u32, spu_inter_func_t, spu_function_t)>(gate));
c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32, spu_inter_func_t, spu_function_t)>(gate));
c->align(asmjit::kAlignCode, 16);
c->bind(next);
}
@ -1442,13 +1442,13 @@ void spu_recompiler::get_events()
if (utils::has_avx())
{
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 0));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 0));
c->vxorps(x86::ymm1, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 0));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 32));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 32));
c->vxorps(x86::ymm2, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 32));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 64));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 64));
c->vxorps(x86::ymm3, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 64));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&SPUThread::rdata) + 96));
c->vmovups(x86::ymm0, x86::yword_ptr(*cpu, offset32(&spu_thread::rdata) + 96));
c->vxorps(x86::ymm4, x86::ymm0, x86::yword_ptr(*qw0, *addr, 0, 96));
c->vorps(x86::ymm0, x86::ymm1, x86::ymm2);
c->vorps(x86::ymm1, x86::ymm3, x86::ymm4);
@ -1460,11 +1460,11 @@ void spu_recompiler::get_events()
else
{
c->movaps(x86::xmm0, x86::dqword_ptr(*qw0, *addr));
c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + 0));
c->xorps(x86::xmm0, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + 0));
for (u32 i = 16; i < 128; i += 16)
{
c->movaps(x86::xmm1, x86::dqword_ptr(*qw0, *addr, 0, i));
c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&SPUThread::rdata) + i));
c->xorps(x86::xmm1, x86::dqword_ptr(*cpu, offset32(&spu_thread::rdata) + i));
c->orps(x86::xmm0, x86::xmm1);
}
@ -1495,7 +1495,7 @@ void spu_recompiler::get_events()
// Check decrementer event (unlikely)
after.emplace_back([=]
{
auto sub = [](SPUThread* _spu, spu_function_t _ret)
auto sub = [](spu_thread* _spu, spu_function_t _ret)
{
if ((_spu->ch_dec_value - (get_timebased_time() - _spu->ch_dec_start_timestamp)) >> 31)
{
@ -1508,7 +1508,7 @@ void spu_recompiler::get_events()
c->bind(tcheck);
c->lea(*ls, x86::qword_ptr(label2));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
});
// Check whether SPU_EVENT_TM is already set
@ -1527,13 +1527,13 @@ void spu_recompiler::get_events()
after.emplace_back([=]
{
auto _throw = [](SPUThread* _spu)
auto _throw = [](spu_thread* _spu)
{
fmt::throw_exception("SPU Events not implemented (mask=0x%x)" HERE, +_spu->ch_event_mask);
};
c->bind(fail);
c->jmp(imm_ptr<void(*)(SPUThread*)>(_throw));
c->jmp(imm_ptr<void(*)(spu_thread*)>(_throw));
});
// Load active events into addr
@ -1547,18 +1547,18 @@ void spu_recompiler::get_events()
void spu_recompiler::UNK(spu_opcode_t op)
{
auto gate = [](SPUThread* _spu, u32 op)
auto gate = [](spu_thread* _spu, u32 op)
{
fmt::throw_exception("Unknown/Illegal instruction (0x%08x)" HERE, op);
};
c->mov(SPU_OFF_32(pc), m_pos);
c->mov(*ls, op.opcode);
c->jmp(asmjit::imm_ptr<void(*)(SPUThread*, u32)>(gate));
c->jmp(asmjit::imm_ptr<void(*)(spu_thread*, u32)>(gate));
m_pos = -1;
}
void spu_stop(SPUThread* _spu, u32 code, spu_function_t _ret)
void spu_stop(spu_thread* _spu, u32 code, spu_function_t _ret)
{
if (!_spu->stop_and_signal(code))
{
@ -1619,12 +1619,12 @@ void spu_recompiler::MFSPR(spu_opcode_t op)
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
}
static void spu_rdch_ret(SPUThread& spu, void*, u32)
static void spu_rdch_ret(spu_thread& spu, void*, u32)
{
// MSVC workaround (TCO)
}
static void spu_rdch(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32))
static void spu_rdch(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32))
{
const s64 result = _spu->get_ch_value(ch);
@ -1733,7 +1733,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
{
LOG_WARNING(SPU, "[0x%x] RDCH: RdDec", m_pos);
auto sub1 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub1 = [](spu_thread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1744,7 +1744,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
auto sub2 = [](SPUThread* _spu, v128* _res, spu_function_t _ret)
auto sub2 = [](spu_thread* _spu, v128* _res, spu_function_t _ret)
{
const u32 out = _spu->ch_dec_value - static_cast<u32>(get_timebased_time() - _spu->ch_dec_start_timestamp);
@ -1752,7 +1752,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
};
using ftype = void (*)(SPUThread*, v128*, spu_function_t);
using ftype = void (*)(spu_thread*, v128*, spu_function_t);
asmjit::Label next = c->newLabel();
c->mov(SPU_OFF_32(pc), m_pos);
@ -1817,7 +1817,7 @@ void spu_recompiler::RDCH(spu_opcode_t op)
c->movdqa(SPU_OFF_128(gpr, op.rt), x86::xmm0);
}
static void spu_rchcnt(SPUThread* _spu, u32 ch, void(*_ret)(SPUThread&, void*, u32 res))
static void spu_rchcnt(spu_thread* _spu, u32 ch, void(*_ret)(spu_thread&, void*, u32 res))
{
// Put result into the third argument
const u32 res = _spu->get_ch_count(ch);
@ -2565,12 +2565,12 @@ void spu_recompiler::MTSPR(spu_opcode_t op)
// Check SPUInterpreter for notes.
}
static void spu_wrch_ret(SPUThread& _spu, void*, u8*)
static void spu_wrch_ret(spu_thread& _spu, void*, u8*)
{
// MSVC workaround (TCO)
}
static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
static void spu_wrch(spu_thread* _spu, u32 ch, u32 value, spu_function_t _ret)
{
if (!_spu->set_ch_value(ch, value))
{
@ -2580,7 +2580,7 @@ static void spu_wrch(SPUThread* _spu, u32 ch, u32 value, spu_function_t _ret)
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
}
static void spu_wrch_mfc(SPUThread* _spu, spu_function_t _ret)
static void spu_wrch_mfc(spu_thread* _spu, spu_function_t _ret)
{
if (!_spu->process_mfc_cmd(_spu->ch_mfc_cmd))
{
@ -2744,7 +2744,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
}
case MFC_WrListStallAck:
{
auto sub = [](SPUThread* _spu, spu_function_t _ret)
auto sub = [](spu_thread* _spu, spu_function_t _ret)
{
_spu->do_mfc(true);
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
@ -2756,14 +2756,14 @@ void spu_recompiler::WRCH(spu_opcode_t op)
c->btr(SPU_OFF_32(ch_stall_mask), qw0->r32());
c->jnc(ret);
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
c->align(kAlignCode, 16);
c->bind(ret);
return;
}
case SPU_WrDec:
{
auto sub = [](SPUThread* _spu, spu_function_t _ret)
auto sub = [](spu_thread* _spu, spu_function_t _ret)
{
_spu->ch_dec_start_timestamp = get_timebased_time();
_ret(*_spu, _spu->_ptr<u8>(0), nullptr);
@ -2771,7 +2771,7 @@ void spu_recompiler::WRCH(spu_opcode_t op)
Label ret = c->newLabel();
c->lea(*ls, x86::qword_ptr(ret));
c->jmp(imm_ptr<void(*)(SPUThread*, spu_function_t)>(sub));
c->jmp(imm_ptr<void(*)(spu_thread*, spu_function_t)>(sub));
c->align(kAlignCode, 16);
c->bind(ret);
c->mov(qw0->r32(), SPU_OFF_32(gpr, op.rt, &v128::_u32, 3));
@ -3113,7 +3113,7 @@ void spu_recompiler::CBX(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03);
c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03);
}
void spu_recompiler::CHX(spu_opcode_t op)
@ -3126,7 +3126,7 @@ void spu_recompiler::CHX(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203);
c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203);
}
void spu_recompiler::CWX(spu_opcode_t op)
@ -3139,7 +3139,7 @@ void spu_recompiler::CWX(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203);
c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203);
}
void spu_recompiler::CDX(spu_opcode_t op)
@ -3153,7 +3153,7 @@ void spu_recompiler::CDX(spu_opcode_t op)
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0);
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
void spu_recompiler::ROTQBI(spu_opcode_t op)
@ -3292,7 +3292,7 @@ void spu_recompiler::CBD(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x03);
c->mov(asmjit::x86::byte_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x03);
}
void spu_recompiler::CHD(spu_opcode_t op)
@ -3316,7 +3316,7 @@ void spu_recompiler::CHD(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x0203);
c->mov(asmjit::x86::word_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x0203);
}
void spu_recompiler::CWD(spu_opcode_t op)
@ -3340,7 +3340,7 @@ void spu_recompiler::CWD(spu_opcode_t op)
const XmmLink& vr = XmmAlloc();
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), 0x00010203);
c->mov(asmjit::x86::dword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), 0x00010203);
}
void spu_recompiler::CDD(spu_opcode_t op)
@ -3365,7 +3365,7 @@ void spu_recompiler::CDD(spu_opcode_t op)
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
c->movdqa(SPU_OFF_128(gpr, op.rt), vr);
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&SPUThread::gpr, op.rt)), *qw0);
c->mov(asmjit::x86::qword_ptr(*cpu, addr->r64(), 0, offset32(&spu_thread::gpr, op.rt)), *qw0);
}
void spu_recompiler::ROTQBII(spu_opcode_t op)