SPU LLVM: LS Memory Mirrors (Optimize loads/stores)

This commit is contained in:
Eladash 2020-07-15 21:57:39 +03:00 committed by Ivan
parent c1a80b8146
commit af1ceb1151
5 changed files with 104 additions and 31 deletions

View file

@ -272,8 +272,7 @@ DECLARE(spu_runtime::g_tail_escape) = build_function_asm<void(*)(spu_thread*, sp
// Tail call, GHC CC (second arg)
c.mov(x86::r13, args[0]);
c.mov(x86::ebp, x86::dword_ptr(args[0], ::offset32(&spu_thread::offset)));
c.add(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::memory_base_addr)));
c.mov(x86::rbp, x86::qword_ptr(args[0], ::offset32(&spu_thread::ls)));
c.mov(x86::r12, args[2]);
c.xor_(x86::ebx, x86::ebx);
c.jmp(args[1]);
@ -1138,7 +1137,7 @@ void spu_recompiler_base::branch(spu_thread& spu, void*, u8* rip)
}
// Find function
const auto func = spu.jit->get_runtime().find(static_cast<u32*>(vm::base(spu.offset)), spu.pc);
const auto func = spu.jit->get_runtime().find(static_cast<u32*>(spu._ptr<void>(0)), spu.pc);
if (!func)
{
@ -7902,13 +7901,51 @@ public:
void STQX(spu_opcode_t op)
{
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
const auto a = get_vr(op.ra);
const auto b = get_vr(op.rb);
for (auto pair : std::initializer_list<std::pair<value_t<u32[4]>, value_t<u32[4]>>>{{a, b}, {b, a}})
{
if (auto cv = llvm::dyn_cast<llvm::Constant>(pair.first.value))
{
v128 data = get_const_vector(cv, m_pos, 10000);
data._u32[3] %= SPU_LS_SIZE;
if (data._u32[3] % 0x10 == 0)
{
value_t<u64> addr = eval(splat<u64>(data._u32[3]) + zext<u64>(extract(pair.second, 3) & 0x3fff0));
make_store_ls(addr, get_vr<u8[16]>(op.rt));
return;
}
}
}
value_t<u64> addr = eval(zext<u64>((extract(a, 3) + extract(b, 3)) & 0x3fff0));
make_store_ls(addr, get_vr<u8[16]>(op.rt));
}
void LQX(spu_opcode_t op)
{
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + extract(get_vr(op.rb), 3)) & 0x3fff0));
const auto a = get_vr(op.ra);
const auto b = get_vr(op.rb);
for (auto pair : std::initializer_list<std::pair<value_t<u32[4]>, value_t<u32[4]>>>{{a, b}, {b, a}})
{
if (auto cv = llvm::dyn_cast<llvm::Constant>(pair.first.value))
{
v128 data = get_const_vector(cv, m_pos, 10000);
data._u32[3] %= SPU_LS_SIZE;
if (data._u32[3] % 0x10 == 0)
{
value_t<u64> addr = eval(splat<u64>(data._u32[3]) + zext<u64>(extract(pair.second, 3) & 0x3fff0));
set_vr(op.rt, make_load_ls(addr));
return;
}
}
}
value_t<u64> addr = eval(zext<u64>((extract(a, 3) + extract(b, 3)) & 0x3fff0));
set_vr(op.rt, make_load_ls(addr));
}
@ -7928,7 +7965,7 @@ public:
{
value_t<u64> addr;
addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type<u64>());
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf));
make_store_ls(addr, get_vr<u8[16]>(op.rt));
}
@ -7936,7 +7973,7 @@ public:
{
value_t<u64> addr;
addr.value = m_ir->CreateZExt(m_interp_magn ? m_interp_pc : get_pc(m_pos), get_type<u64>());
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & 0x3fff0);
addr = eval(((get_imm<u64>(op.i16, false) << 2) + addr) & (m_interp_magn ? 0x3fff0 : ~0xf));
set_vr(op.rt, make_load_ls(addr));
}
@ -7953,13 +7990,13 @@ public:
}
}
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
value_t<u64> addr = eval(zext<u64>(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm<u64>(op.si10) << 4));
make_store_ls(addr, get_vr<u8[16]>(op.rt));
}
void LQD(spu_opcode_t op)
{
value_t<u64> addr = eval(zext<u64>((extract(get_vr(op.ra), 3) + (get_imm<u32>(op.si10) << 4)) & 0x3fff0));
value_t<u64> addr = eval(zext<u64>(extract(get_vr(op.ra), 3) & 0x3fff0) + (get_imm<u64>(op.si10) << 4));
set_vr(op.rt, make_load_ls(addr));
}