mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 00:11:24 +12:00
LLVM: enable some JIT events (Intel, Perf)
Made some related adjustments. Currently incomplete.
This commit is contained in:
parent
510041a873
commit
d836033212
16 changed files with 233 additions and 162 deletions
|
@ -405,7 +405,7 @@ std::array<u32, 2> op_branch_targets(u32 pc, spu_opcode_t op)
|
|||
return res;
|
||||
}
|
||||
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _old, const void* _new)>("spu_putllc_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -420,12 +420,8 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 168);
|
||||
#ifdef _WIN32
|
||||
c.sub(x86::rsp, 168);
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
|
@ -447,16 +443,14 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
#endif
|
||||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r12);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(args[1], x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.lea(args[1], x86::qword_ptr(args[1], args[0]));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 0));
|
||||
c.prefetchw(x86::byte_ptr(args[1], 64));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbx));
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
|
@ -504,8 +498,6 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
c.cmp(x86::rax, x86::qword_ptr(reinterpret_cast<u64>(&g_rtm_tx_limit2)));
|
||||
c.jae(fall);
|
||||
});
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
|
||||
// Check pause flag
|
||||
c.bt(x86::dword_ptr(args[2], ::offset32(&spu_thread::state) - ::offset32(&spu_thread::rdata)), static_cast<u32>(cpu_flag::pause));
|
||||
|
@ -514,10 +506,10 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vxorps(x86::ymm0, x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vxorps(x86::ymm1, x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vxorps(x86::ymm2, x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vxorps(x86::ymm3, x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
c.vorps(x86::ymm0, x86::ymm0, x86::ymm1);
|
||||
c.vorps(x86::ymm1, x86::ymm2, x86::ymm3);
|
||||
c.vorps(x86::ymm0, x86::ymm1, x86::ymm0);
|
||||
|
@ -525,14 +517,14 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
}
|
||||
else
|
||||
{
|
||||
c.xorps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
||||
c.xorps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
||||
c.xorps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
||||
c.xorps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
||||
c.xorps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
||||
c.xorps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
||||
c.xorps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
||||
c.xorps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
||||
c.xorps(x86::xmm0, x86::oword_ptr(args[1], 0));
|
||||
c.xorps(x86::xmm1, x86::oword_ptr(args[1], 16));
|
||||
c.xorps(x86::xmm2, x86::oword_ptr(args[1], 32));
|
||||
c.xorps(x86::xmm3, x86::oword_ptr(args[1], 48));
|
||||
c.xorps(x86::xmm4, x86::oword_ptr(args[1], 64));
|
||||
c.xorps(x86::xmm5, x86::oword_ptr(args[1], 80));
|
||||
c.xorps(x86::xmm6, x86::oword_ptr(args[1], 96));
|
||||
c.xorps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
||||
c.orps(x86::xmm0, x86::xmm1);
|
||||
c.orps(x86::xmm2, x86::xmm3);
|
||||
c.orps(x86::xmm4, x86::xmm5);
|
||||
|
@ -547,25 +539,25 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 0), x86::ymm4);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 32), x86::ymm5);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 64), x86::ymm6);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 96), x86::ymm7);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 0), x86::ymm4);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 32), x86::ymm5);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 64), x86::ymm6);
|
||||
c.vmovaps(x86::yword_ptr(args[1], 96), x86::ymm7);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 0), x86::xmm8);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 16), x86::xmm9);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 32), x86::xmm10);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 48), x86::xmm11);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 64), x86::xmm12);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 80), x86::xmm13);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 96), x86::xmm14);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 112), x86::xmm15);
|
||||
c.movaps(x86::oword_ptr(args[1], 0), x86::xmm8);
|
||||
c.movaps(x86::oword_ptr(args[1], 16), x86::xmm9);
|
||||
c.movaps(x86::oword_ptr(args[1], 32), x86::xmm10);
|
||||
c.movaps(x86::oword_ptr(args[1], 48), x86::xmm11);
|
||||
c.movaps(x86::oword_ptr(args[1], 64), x86::xmm12);
|
||||
c.movaps(x86::oword_ptr(args[1], 80), x86::xmm13);
|
||||
c.movaps(x86::oword_ptr(args[1], 96), x86::xmm14);
|
||||
c.movaps(x86::oword_ptr(args[1], 112), x86::xmm15);
|
||||
}
|
||||
|
||||
c.xend();
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 64);
|
||||
c.lock().add(x86::qword_ptr(x86::r11), 64);
|
||||
c.add(x86::qword_ptr(args[2], ::offset32(&spu_thread::stx) - ::offset32(&spu_thread::rdata)), 1);
|
||||
build_get_tsc(c);
|
||||
c.sub(x86::rax, stamp0);
|
||||
|
@ -577,21 +569,21 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
// Load previous data to store back to rdata
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(x86::rbp, 0));
|
||||
c.vmovaps(x86::ymm1, x86::yword_ptr(x86::rbp, 32));
|
||||
c.vmovaps(x86::ymm2, x86::yword_ptr(x86::rbp, 64));
|
||||
c.vmovaps(x86::ymm3, x86::yword_ptr(x86::rbp, 96));
|
||||
c.vmovaps(x86::ymm0, x86::yword_ptr(args[1], 0));
|
||||
c.vmovaps(x86::ymm1, x86::yword_ptr(args[1], 32));
|
||||
c.vmovaps(x86::ymm2, x86::yword_ptr(args[1], 64));
|
||||
c.vmovaps(x86::ymm3, x86::yword_ptr(args[1], 96));
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::xmm0, x86::oword_ptr(x86::rbp, 0));
|
||||
c.movaps(x86::xmm1, x86::oword_ptr(x86::rbp, 16));
|
||||
c.movaps(x86::xmm2, x86::oword_ptr(x86::rbp, 32));
|
||||
c.movaps(x86::xmm3, x86::oword_ptr(x86::rbp, 48));
|
||||
c.movaps(x86::xmm4, x86::oword_ptr(x86::rbp, 64));
|
||||
c.movaps(x86::xmm5, x86::oword_ptr(x86::rbp, 80));
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(x86::rbp, 96));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(x86::rbp, 112));
|
||||
c.movaps(x86::xmm0, x86::oword_ptr(args[1], 0));
|
||||
c.movaps(x86::xmm1, x86::oword_ptr(args[1], 16));
|
||||
c.movaps(x86::xmm2, x86::oword_ptr(args[1], 32));
|
||||
c.movaps(x86::xmm3, x86::oword_ptr(args[1], 48));
|
||||
c.movaps(x86::xmm4, x86::oword_ptr(args[1], 64));
|
||||
c.movaps(x86::xmm5, x86::oword_ptr(args[1], 80));
|
||||
c.movaps(x86::xmm6, x86::oword_ptr(args[1], 96));
|
||||
c.movaps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
||||
}
|
||||
|
||||
c.xend();
|
||||
|
@ -603,7 +595,7 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
c.jmp(_ret);
|
||||
|
||||
c.bind(fail2);
|
||||
c.lock().sub(x86::qword_ptr(x86::rbx), 64);
|
||||
c.lock().sub(x86::qword_ptr(x86::r11), 64);
|
||||
c.bind(load);
|
||||
|
||||
// Store previous data back to rdata
|
||||
|
@ -652,6 +644,7 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
c.movups(x86::xmm14, x86::oword_ptr(x86::rsp, 128));
|
||||
c.movups(x86::xmm15, x86::oword_ptr(x86::rsp, 144));
|
||||
}
|
||||
c.add(x86::rsp, 168);
|
||||
#endif
|
||||
|
||||
if (s_tsx_avx)
|
||||
|
@ -659,15 +652,18 @@ const auto spu_putllc_tx = built_function<u64(*)(u32 raddr, u64 rtime, void* _ol
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 168);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
#ifdef __linux__
|
||||
// Hack for perf profiling (TODO)
|
||||
Label ret2 = c.newLabel();
|
||||
c.lea(x86::rdx, x86::qword_ptr(ret2));
|
||||
c.push(x86::rdx);
|
||||
c.push(x86::rdx);
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata, u64* _stx, u64* _ftx)>("spu_putlluc_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -680,30 +676,20 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
|||
//}
|
||||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 40);
|
||||
#ifdef _WIN32
|
||||
c.sub(x86::rsp, 40);
|
||||
if (!s_tsx_avx)
|
||||
{
|
||||
c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6);
|
||||
c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r12);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbx));
|
||||
c.mov(x86::r13, args[1]);
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0]));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::r11, 64));
|
||||
|
||||
// Prepare data
|
||||
if (s_tsx_avx)
|
||||
|
@ -725,6 +711,10 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
|||
c.movaps(x86::xmm7, x86::oword_ptr(args[1], 112));
|
||||
}
|
||||
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(args[1], x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
build_get_tsc(c, stamp0);
|
||||
|
@ -739,35 +729,29 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
|||
c.jae(fall);
|
||||
});
|
||||
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 0));
|
||||
c.prefetchw(x86::byte_ptr(x86::rbp, 64));
|
||||
|
||||
// // Check pause flag
|
||||
// c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
||||
// c.jc(fall);
|
||||
c.xbegin(tx1);
|
||||
|
||||
if (s_tsx_avx)
|
||||
{
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(x86::rbp, 96), x86::ymm3);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 0), x86::ymm0);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 32), x86::ymm1);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 64), x86::ymm2);
|
||||
c.vmovaps(x86::yword_ptr(x86::r11, 96), x86::ymm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 0), x86::xmm0);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 16), x86::xmm1);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 32), x86::xmm2);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 48), x86::xmm3);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 64), x86::xmm4);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 80), x86::xmm5);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 96), x86::xmm6);
|
||||
c.movaps(x86::oword_ptr(x86::rbp, 112), x86::xmm7);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 0), x86::xmm0);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 16), x86::xmm1);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 32), x86::xmm2);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 48), x86::xmm3);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 64), x86::xmm4);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 80), x86::xmm5);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 96), x86::xmm6);
|
||||
c.movaps(x86::oword_ptr(x86::r11, 112), x86::xmm7);
|
||||
}
|
||||
|
||||
c.xend();
|
||||
c.lock().add(x86::qword_ptr(x86::rbx), 32);
|
||||
c.lock().add(x86::qword_ptr(args[1]), 32);
|
||||
// stx++
|
||||
c.add(x86::qword_ptr(args[2]), 1);
|
||||
build_get_tsc(c);
|
||||
|
@ -786,6 +770,7 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
|||
c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0));
|
||||
c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16));
|
||||
}
|
||||
c.add(x86::rsp, 40);
|
||||
#endif
|
||||
|
||||
if (s_tsx_avx)
|
||||
|
@ -793,15 +778,18 @@ const auto spu_putlluc_tx = built_function<u64(*)(u32 raddr, const void* rdata,
|
|||
c.vzeroupper();
|
||||
}
|
||||
|
||||
c.add(x86::rsp, 40);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
#ifdef __linux__
|
||||
// Hack for perf profiling (TODO)
|
||||
Label ret2 = c.newLabel();
|
||||
c.lea(x86::rdx, x86::qword_ptr(ret2));
|
||||
c.push(x86::rdx);
|
||||
c.push(x86::rdx);
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
});
|
||||
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>([](asmjit::X86Assembler& c, auto& args)
|
||||
const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_thread* _cpu, u64 rtime)>("spu_getllar_tx", [](asmjit::X86Assembler& c, auto& args)
|
||||
{
|
||||
using namespace asmjit;
|
||||
|
||||
|
@ -815,8 +803,6 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
|||
|
||||
// Create stack frame if necessary (Windows ABI has only 6 volatile vector registers)
|
||||
c.push(x86::rbp);
|
||||
c.push(x86::r13);
|
||||
c.push(x86::r12);
|
||||
c.push(x86::rbx);
|
||||
c.sub(x86::rsp, 40);
|
||||
#ifdef _WIN32
|
||||
|
@ -828,13 +814,12 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
|||
#endif
|
||||
|
||||
// Prepare registers
|
||||
build_swap_rdx_with(c, args, x86::r12);
|
||||
build_swap_rdx_with(c, args, x86::r10);
|
||||
c.mov(x86::rbp, x86::qword_ptr(reinterpret_cast<u64>(&vm::g_sudo_addr)));
|
||||
c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0]));
|
||||
c.and_(args[0].r32(), 0xff80);
|
||||
c.shr(args[0].r32(), 1);
|
||||
c.lea(x86::rbx, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
c.mov(x86::r13, args[1]);
|
||||
c.lea(x86::r11, x86::qword_ptr(reinterpret_cast<u64>(+vm::g_reservations), args[0]));
|
||||
|
||||
// Alloc args[0] to stamp0
|
||||
const auto stamp0 = args[0];
|
||||
|
@ -853,7 +838,7 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
|||
// Check pause flag
|
||||
c.bt(x86::dword_ptr(args[2], ::offset32(&cpu_thread::state)), static_cast<u32>(cpu_flag::pause));
|
||||
c.jc(fall);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rbx));
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r11));
|
||||
c.and_(x86::rax, -128);
|
||||
c.cmp(x86::rax, args[3]);
|
||||
c.jne(fall);
|
||||
|
@ -926,9 +911,16 @@ const auto spu_getllar_tx = built_function<u64(*)(u32 raddr, void* rdata, cpu_th
|
|||
|
||||
c.add(x86::rsp, 40);
|
||||
c.pop(x86::rbx);
|
||||
c.pop(x86::r12);
|
||||
c.pop(x86::r13);
|
||||
c.pop(x86::rbp);
|
||||
|
||||
#ifdef __linux__
|
||||
// Hack for perf profiling (TODO)
|
||||
Label ret2 = c.newLabel();
|
||||
c.lea(x86::rdx, x86::qword_ptr(ret2));
|
||||
c.push(x86::rdx);
|
||||
c.push(x86::rdx);
|
||||
c.bind(ret2);
|
||||
#endif
|
||||
c.ret();
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue