diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 7995a857bf..d49d240597 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -23,6 +23,9 @@ #include #include +// Verify AVX availability for TSX transactions +static const bool s_tsx_avx = utils::has_avx(); + #ifdef _MSC_VER bool operator ==(const u128& lhs, const u128& rhs) { @@ -183,6 +186,36 @@ const auto spu_putllc_tx = build_function_asm([]( using namespace asmjit; Label fall = c.newLabel(); + Label _ret = c.newLabel(); + + if (utils::has_avx() && !s_tsx_avx) + { + c.vzeroupper(); + } + + // Create stack frame if necessary (Windows ABI has only 6 volatile vector registers) +#ifdef _WIN32 + if (!s_tsx_avx) + { + c.sub(x86::rsp, 40); + c.movups(x86::oword_ptr(x86::rsp, 0), x86::xmm6); + c.movups(x86::oword_ptr(x86::rsp, 16), x86::xmm7); + } +#endif // Prepare registers c.mov(x86::rax, imm_ptr(&vm::g_reservations)); @@ -287,17 +411,48 @@ const auto spu_getll_tx = build_function_asm([]( // Begin transaction Label begin = build_transaction_enter(c, fall); c.mov(x86::rax, x86::qword_ptr(x86::r10)); - c.vmovaps(x86::ymm0, x86::yword_ptr(x86::r11, 0)); - c.vmovaps(x86::ymm1, x86::yword_ptr(x86::r11, 32)); - c.vmovaps(x86::ymm2, x86::yword_ptr(x86::r11, 64)); - c.vmovaps(x86::ymm3, x86::yword_ptr(x86::r11, 96)); + + if (s_tsx_avx) + { + c.vmovaps(x86::ymm0, x86::yword_ptr(x86::r11, 0)); + c.vmovaps(x86::ymm1, x86::yword_ptr(x86::r11, 32)); + c.vmovaps(x86::ymm2, x86::yword_ptr(x86::r11, 64)); + c.vmovaps(x86::ymm3, x86::yword_ptr(x86::r11, 96)); + } + else + { + c.movaps(x86::xmm0, x86::oword_ptr(x86::r11, 0)); + c.movaps(x86::xmm1, x86::oword_ptr(x86::r11, 16)); + c.movaps(x86::xmm2, x86::oword_ptr(x86::r11, 32)); + c.movaps(x86::xmm3, x86::oword_ptr(x86::r11, 48)); + c.movaps(x86::xmm4, x86::oword_ptr(x86::r11, 64)); + c.movaps(x86::xmm5, x86::oword_ptr(x86::r11, 80)); + c.movaps(x86::xmm6, x86::oword_ptr(x86::r11, 96)); + c.movaps(x86::xmm7, x86::oword_ptr(x86::r11, 112)); + } + c.xend(); - c.vmovups(x86::yword_ptr(args[1], 0), x86::ymm0); - c.vmovups(x86::yword_ptr(args[1], 32), x86::ymm1); - c.vmovups(x86::yword_ptr(args[1], 64), x86::ymm2); - c.vmovups(x86::yword_ptr(args[1], 96), x86::ymm3); - c.vzeroupper(); - c.ret(); + + if (s_tsx_avx) + { + c.vmovups(x86::yword_ptr(args[1], 0), x86::ymm0); + c.vmovups(x86::yword_ptr(args[1], 32), x86::ymm1); + c.vmovups(x86::yword_ptr(args[1], 64), x86::ymm2); + c.vmovups(x86::yword_ptr(args[1], 96), x86::ymm3); + } + else + { + c.movaps(x86::oword_ptr(args[1], 0), x86::xmm0); + c.movaps(x86::oword_ptr(args[1], 16), x86::xmm1); + c.movaps(x86::oword_ptr(args[1], 32), x86::xmm2); + c.movaps(x86::oword_ptr(args[1], 48), x86::xmm3); + c.movaps(x86::oword_ptr(args[1], 64), x86::xmm4); + c.movaps(x86::oword_ptr(args[1], 80), x86::xmm5); + c.movaps(x86::oword_ptr(args[1], 96), x86::xmm6); + c.movaps(x86::oword_ptr(args[1], 112), x86::xmm7); + } + + c.jmp(_ret); // Touch memory after transaction failure c.bind(fall); @@ -311,6 +466,24 @@ const auto spu_getll_tx = build_function_asm([]( c.sub(args[0], 1); c.jnz(begin); c.mov(x86::eax, 1); + c.jmp(_ret); + + c.bind(_ret); + +#ifdef _WIN32 + if (!s_tsx_avx) + { + c.movups(x86::xmm6, x86::oword_ptr(x86::rsp, 0)); + c.movups(x86::xmm7, x86::oword_ptr(x86::rsp, 16)); + c.add(x86::rsp, 40); + } +#endif + + if (s_tsx_avx) + { + c.vzeroupper(); + } + c.ret(); }); @@ -319,6 +492,22 @@ const auto spu_putlluc_tx = build_function_asm