mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-06 23:11:25 +12:00
2624 lines
70 KiB
C++
2624 lines
70 KiB
C++
#include "stdafx.h"
|
|
#include "Utilities/Log.h"
|
|
#include "Utilities/File.h"
|
|
#include "Emu/System.h"
|
|
|
|
#include "SPUDisAsm.h"
|
|
#include "SPUThread.h"
|
|
#include "SPUInterpreter.h"
|
|
#include "SPUASMJITRecompiler.h"
|
|
|
|
#define ASMJIT_STATIC
|
|
#define ASMJIT_DEBUG
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma comment(lib, "asmjit.lib")
|
|
#endif
|
|
|
|
#include "asmjit.h"
|
|
|
|
#define OFFSET_OF(type, x) static_cast<s32>(reinterpret_cast<uintptr_t>(&(((type*)0)->x)))
|
|
|
|
#define SPU_OFF_128(x) asmjit::host::oword_ptr(*cpu, OFFSET_OF(SPUThread, x))
|
|
#define SPU_OFF_64(x) asmjit::host::qword_ptr(*cpu, OFFSET_OF(SPUThread, x))
|
|
#define SPU_OFF_32(x) asmjit::host::dword_ptr(*cpu, OFFSET_OF(SPUThread, x))
|
|
#define SPU_OFF_16(x) asmjit::host::word_ptr(*cpu, OFFSET_OF(SPUThread, x))
|
|
#define SPU_OFF_8(x) asmjit::host::byte_ptr(*cpu, OFFSET_OF(SPUThread, x))
|
|
|
|
spu_recompiler::spu_recompiler()
|
|
: m_jit(std::make_shared<asmjit::JitRuntime>())
|
|
{
|
|
asmjit::X86CpuInfo inf;
|
|
asmjit::X86CpuUtil::detect(&inf);
|
|
|
|
LOG_SUCCESS(SPU, "SPU Recompiler (ASMJIT) created...");
|
|
|
|
fs::file("SPUJIT.log", fom::rewrite) << fmt::format("SPU JIT initialization...\n\nTitle: %s\nTitle ID: %s\n\n", Emu.GetTitle().c_str(), Emu.GetTitleID().c_str());
|
|
}
|
|
|
|
void spu_recompiler::compile(spu_function_t& f)
|
|
{
|
|
std::lock_guard<std::mutex> lock(m_mutex);
|
|
|
|
if (f.compiled)
|
|
{
|
|
// return if function already compiled
|
|
return;
|
|
}
|
|
|
|
if (f.addr >= 0x40000 || f.addr % 4 || f.size == 0 || f.size > 0x40000 - f.addr || f.size % 4)
|
|
{
|
|
throw EXCEPTION("Invalid SPU function (addr=0x%05x, size=0x%x)", f.addr, f.size);
|
|
}
|
|
|
|
using namespace asmjit;
|
|
|
|
SPUDisAsm dis_asm(CPUDisAsm_InterpreterMode);
|
|
dis_asm.offset = reinterpret_cast<u8*>(f.data.data()) - f.addr;
|
|
|
|
StringLogger logger;
|
|
logger.setOption(kLoggerOptionBinaryForm, true);
|
|
|
|
std::string log = fmt::format("========== SPU FUNCTION 0x%05x - 0x%05x ==========\n\n", f.addr, f.addr + f.size);
|
|
|
|
this->m_func = &f;
|
|
|
|
X86Compiler compiler(m_jit.get());
|
|
this->c = &compiler;
|
|
compiler.setLogger(&logger);
|
|
|
|
compiler.addFunc(kFuncConvHost, FuncBuilder2<u32, void*, void*>());
|
|
|
|
// Initialize variables
|
|
X86GpVar cpu_var(compiler, kVarTypeIntPtr, "cpu");
|
|
compiler.setArg(0, cpu_var);
|
|
compiler.alloc(cpu_var, asmjit::host::rbp); // ASMJIT bug workaround
|
|
this->cpu = &cpu_var;
|
|
|
|
X86GpVar ls_var(compiler, kVarTypeIntPtr, "ls");
|
|
compiler.setArg(1, ls_var);
|
|
compiler.alloc(ls_var, asmjit::host::rbx); // ASMJIT bug workaround
|
|
this->ls = &ls_var;
|
|
|
|
X86GpVar addr_var(compiler, kVarTypeUInt32, "addr");
|
|
this->addr = &addr_var;
|
|
X86GpVar qw0_var(compiler, kVarTypeUInt64, "qw0");
|
|
this->qw0 = &qw0_var;
|
|
X86GpVar qw1_var(compiler, kVarTypeUInt64, "qw1");
|
|
this->qw1 = &qw1_var;
|
|
X86GpVar qw2_var(compiler, kVarTypeUInt64, "qw2");
|
|
this->qw2 = &qw2_var;
|
|
|
|
std::array<X86XmmVar, 6> vec_vars;
|
|
|
|
for (u32 i = 0; i < vec_vars.size(); i++)
|
|
{
|
|
vec_vars[i] = X86XmmVar{ compiler, kX86VarTypeXmm, fmt::format("vec%d", i).c_str() };
|
|
vec.at(i) = vec_vars.data() + i;
|
|
}
|
|
|
|
// Initialize labels
|
|
std::vector<Label> pos_labels{ 0x10000 };
|
|
this->labels = pos_labels.data();
|
|
|
|
// Register labels for block entries
|
|
for (const u32 addr : f.blocks)
|
|
{
|
|
if (addr < f.addr || addr >= f.addr + f.size || addr % 4)
|
|
{
|
|
throw EXCEPTION("Invalid function block entry (0x%05x)", addr);
|
|
}
|
|
|
|
pos_labels[addr / 4] = compiler.newLabel();
|
|
}
|
|
|
|
// Register label for post-the-end address
|
|
pos_labels[(f.addr + f.size) / 4 % 0x10000] = compiler.newLabel();
|
|
|
|
// Register label for jump table resolver
|
|
Label jt_label = compiler.newLabel();
|
|
this->jt = &jt_label;
|
|
|
|
for (const u32 addr : f.jtable)
|
|
{
|
|
if (addr < f.addr || addr >= f.addr + f.size || addr % 4)
|
|
{
|
|
throw EXCEPTION("Invalid jump table entry (0x%05x)", addr);
|
|
}
|
|
}
|
|
|
|
// Register label for the function return
|
|
Label end_label = compiler.newLabel();
|
|
this->end = &end_label;
|
|
|
|
// Start compilation
|
|
m_pos = f.addr;
|
|
|
|
for (const u32 op : f.data)
|
|
{
|
|
// Bind label if initialized
|
|
if (pos_labels[m_pos / 4].isInitialized())
|
|
{
|
|
compiler.bind(pos_labels[m_pos / 4]);
|
|
|
|
if (f.blocks.find(m_pos) != f.blocks.end())
|
|
{
|
|
compiler.addComment("Block:");
|
|
}
|
|
}
|
|
|
|
// Disasm
|
|
dis_asm.dump_pc = m_pos;
|
|
dis_asm.do_disasm(op);
|
|
compiler.addComment(dis_asm.last_opcode.c_str());
|
|
log += dis_asm.last_opcode.c_str();
|
|
log += '\n';
|
|
|
|
// Recompiler function
|
|
(this->*spu_recompiler::opcodes[op])({ op });
|
|
|
|
// Collect allocated xmm vars
|
|
for (u32 i = 0; i < vec_vars.size(); i++)
|
|
{
|
|
if (!vec[i])
|
|
{
|
|
compiler.unuse(vec_vars[i]);
|
|
vec[i] = vec_vars.data() + i;
|
|
}
|
|
}
|
|
|
|
// Set next position
|
|
m_pos += 4;
|
|
}
|
|
|
|
log += '\n';
|
|
|
|
// Generate default function end (go to the next address)
|
|
compiler.bind(pos_labels[m_pos / 4 % 0x10000]);
|
|
compiler.addComment("Fallthrough:");
|
|
compiler.mov(addr_var, spu_branch_target(m_pos));
|
|
compiler.jmp(end_label);
|
|
|
|
// Generate jump table resolver (uses addr_var)
|
|
compiler.bind(jt_label);
|
|
|
|
if (f.jtable.size())
|
|
{
|
|
compiler.addComment("Jump table resolver:");
|
|
}
|
|
|
|
for (const u32 addr : f.jtable)
|
|
{
|
|
if ((addr % 4) == 0 && addr < 0x40000 && pos_labels[addr / 4].isInitialized())
|
|
{
|
|
// It could be binary search or something
|
|
compiler.cmp(addr_var, addr);
|
|
compiler.je(pos_labels[addr / 4]);
|
|
}
|
|
else
|
|
{
|
|
LOG_ERROR(SPU, "Unable to add jump table entry (0x%05x)", addr);
|
|
}
|
|
}
|
|
|
|
// Generate function end (returns addr_var)
|
|
compiler.bind(end_label);
|
|
compiler.unuse(cpu_var);
|
|
compiler.unuse(ls_var);
|
|
compiler.ret(addr_var);
|
|
|
|
// Finalization
|
|
compiler.endFunc();
|
|
|
|
// Compile and store function address
|
|
f.compiled = asmjit_cast<spu_jit_func_t>(compiler.make());
|
|
|
|
// Add ASMJIT logs
|
|
log += logger.getString();
|
|
log += "\n\n\n";
|
|
|
|
// Append log file
|
|
fs::file("SPUJIT.log", fom::write | fom::append) << log;
|
|
}
|
|
|
|
spu_recompiler::XmmLink spu_recompiler::XmmAlloc() // get empty xmm register
|
|
{
|
|
for (auto& v : vec)
|
|
{
|
|
if (v) return{ v };
|
|
}
|
|
|
|
throw EXCEPTION("Out of Xmm Vars");
|
|
}
|
|
|
|
spu_recompiler::XmmLink spu_recompiler::XmmGet(s8 reg, XmmType type) // get xmm register with specific SPU reg
|
|
{
|
|
XmmLink result = XmmAlloc();
|
|
|
|
switch (type)
|
|
{
|
|
case XmmType::Int: c->movdqa(result, SPU_OFF_128(gpr[reg])); break;
|
|
case XmmType::Float: c->movaps(result, SPU_OFF_128(gpr[reg])); break;
|
|
case XmmType::Double: c->movapd(result, SPU_OFF_128(gpr[reg])); break;
|
|
default: throw EXCEPTION("Invalid XmmType");
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
inline asmjit::X86Mem spu_recompiler::XmmConst(v128 data)
|
|
{
|
|
return c->newXmmConst(asmjit::kConstScopeLocal, asmjit::Vec128::fromUq(data._u64[0], data._u64[1]));
|
|
}
|
|
|
|
inline asmjit::X86Mem spu_recompiler::XmmConst(__m128 data)
|
|
{
|
|
return XmmConst(v128::fromF(data));
|
|
}
|
|
|
|
inline asmjit::X86Mem spu_recompiler::XmmConst(__m128i data)
|
|
{
|
|
return XmmConst(v128::fromV(data));
|
|
}
|
|
|
|
void spu_recompiler::InterpreterCall(spu_opcode_t op)
|
|
{
|
|
auto gate = [](SPUThread* _spu, u32 opcode, spu_inter_func_t _func) noexcept -> u32
|
|
{
|
|
try
|
|
{
|
|
// TODO: check correctness
|
|
|
|
const u32 old_pc = _spu->pc;
|
|
|
|
if (_spu->m_state && _spu->check_status())
|
|
{
|
|
return 0x2000000 | _spu->pc;
|
|
}
|
|
|
|
_func(*_spu, { opcode });
|
|
|
|
if (old_pc != _spu->pc)
|
|
{
|
|
_spu->pc += 4;
|
|
return 0x2000000 | _spu->pc;
|
|
}
|
|
|
|
_spu->pc += 4;
|
|
return 0;
|
|
}
|
|
catch (...)
|
|
{
|
|
_spu->pending_exception = std::current_exception();
|
|
return 0x1000000 | _spu->pc;
|
|
}
|
|
};
|
|
|
|
c->mov(SPU_OFF_32(pc), m_pos);
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, u32(SPUThread*, u32, spu_inter_func_t)>(gate)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<u32, void*, u32, void*>());
|
|
call->setArg(0, *cpu);
|
|
call->setArg(1, asmjit::imm_u(op.opcode));
|
|
call->setArg(2, asmjit::imm_ptr(asmjit_cast<void*>(spu_interpreter::fast::g_spu_opcode_table[op.opcode])));
|
|
call->setRet(0, *addr);
|
|
|
|
// return immediately if an error occured
|
|
c->test(*addr, *addr);
|
|
c->jnz(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::FunctionCall()
|
|
{
|
|
auto gate = [](SPUThread* _spu, u32 link) noexcept -> u32
|
|
{
|
|
_spu->recursion_level++;
|
|
|
|
try
|
|
{
|
|
// TODO: check correctness
|
|
|
|
if (_spu->pc & 0x4000000)
|
|
{
|
|
if (_spu->pc & 0x8000000)
|
|
{
|
|
throw EXCEPTION("Undefined behaviour");
|
|
}
|
|
|
|
_spu->set_interrupt_status(true);
|
|
_spu->pc &= ~0x4000000;
|
|
}
|
|
else if (_spu->pc & 0x8000000)
|
|
{
|
|
_spu->set_interrupt_status(false);
|
|
_spu->pc &= ~0x8000000;
|
|
}
|
|
|
|
if (_spu->pc == link)
|
|
{
|
|
LOG_ERROR(SPU, "Branch-to-next");
|
|
}
|
|
else if (_spu->pc == link - 4)
|
|
{
|
|
LOG_ERROR(SPU, "Branch-to-self");
|
|
}
|
|
|
|
while (!_spu->m_state || !_spu->check_status())
|
|
{
|
|
// Call override function directly since the type is known
|
|
static_cast<SPURecompilerDecoder&>(*_spu->m_dec).DecodeMemory(_spu->offset + _spu->pc);
|
|
|
|
if (_spu->m_state & CPU_STATE_RETURN)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (_spu->pc == link)
|
|
{
|
|
// returned successfully
|
|
_spu->recursion_level--;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
_spu->recursion_level--;
|
|
return 0x2000000 | _spu->pc;
|
|
}
|
|
catch (...)
|
|
{
|
|
_spu->pending_exception = std::current_exception();
|
|
|
|
_spu->recursion_level--;
|
|
return 0x1000000 | _spu->pc;
|
|
}
|
|
};
|
|
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, u32(SPUThread*, u32)>(gate)), asmjit::kFuncConvHost, asmjit::FuncBuilder2<u32, SPUThread*, u32>());
|
|
call->setArg(0, *cpu);
|
|
call->setArg(1, asmjit::imm_u(spu_branch_target(m_pos + 4)));
|
|
call->setRet(0, *addr);
|
|
|
|
// return immediately if an error occured
|
|
c->test(*addr, *addr);
|
|
c->jnz(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::STOP(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op); // TODO
|
|
}
|
|
|
|
void spu_recompiler::LNOP(spu_opcode_t op)
|
|
{
|
|
}
|
|
|
|
void spu_recompiler::SYNC(spu_opcode_t op)
|
|
{
|
|
// This instruction must be used following a store instruction that modifies the instruction stream.
|
|
c->mfence();
|
|
}
|
|
|
|
void spu_recompiler::DSYNC(spu_opcode_t op)
|
|
{
|
|
// This instruction forces all earlier load, store, and channel instructions to complete before proceeding.
|
|
c->mfence();
|
|
}
|
|
|
|
void spu_recompiler::MFSPR(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op);
|
|
}
|
|
|
|
void spu_recompiler::RDCH(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op); // TODO
|
|
}
|
|
|
|
void spu_recompiler::RCHCNT(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op); // TODO
|
|
}
|
|
|
|
void spu_recompiler::SF(spu_opcode_t op)
|
|
{
|
|
// sub from
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->psubd(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::OR(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->por(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::BG(spu_opcode_t op)
|
|
{
|
|
// compare if-greater-than
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(0x80000000)));
|
|
c->pxor(va, vi);
|
|
c->pxor(vi, SPU_OFF_128(gpr[op.rb]));
|
|
c->pcmpgtd(va, vi);
|
|
c->paddd(va, XmmConst(_mm_set1_epi32(1)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SFH(spu_opcode_t op)
|
|
{
|
|
// sub from (halfword)
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->psubw(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::NOR(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->por(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->pxor(va, XmmConst(_mm_set1_epi32(0xffffffff)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ABSDB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vm = XmmAlloc();
|
|
c->movdqa(vm, va);
|
|
c->pmaxub(va, vb);
|
|
c->pminub(vb, vm);
|
|
c->psubb(va, vb);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROT(spu_opcode_t op)
|
|
{
|
|
auto body = [](u32* t, const u32* a, const s32* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = (a[i] << b[i]) | (a[i] >> (32 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*, const s32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 4; i++) // unrolled loop
|
|
//{
|
|
// c->mov(qw0->r32(), SPU_OFF_32(gpr[op.ra]._u32[i]));
|
|
// c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[i]));
|
|
// c->rol(qw0->r32(), *addr);
|
|
// c->mov(SPU_OFF_32(gpr[op.rt]._u32[i]), qw0->r32());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTM(spu_opcode_t op)
|
|
{
|
|
auto body = [](u32* t, const u32* a, const u32* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = static_cast<u32>(static_cast<u64>(a[i]) >> (0 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 4; i++) // unrolled loop
|
|
//{
|
|
// c->mov(qw0->r32(), SPU_OFF_32(gpr[op.ra]._u32[i]));
|
|
// c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[i]));
|
|
// c->neg(*addr);
|
|
// c->shr(*qw0, *addr);
|
|
// c->mov(SPU_OFF_32(gpr[op.rt]._u32[i]), qw0->r32());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTMA(spu_opcode_t op)
|
|
{
|
|
auto body = [](s32* t, const s32* a, const u32* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = static_cast<s32>(static_cast<s64>(a[i]) >> (0 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(s32*, const s32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 4; i++) // unrolled loop
|
|
//{
|
|
// c->movsxd(*qw0, SPU_OFF_32(gpr[op.ra]._u32[i]));
|
|
// c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[i]));
|
|
// c->neg(*addr);
|
|
// c->sar(*qw0, *addr);
|
|
// c->mov(SPU_OFF_32(gpr[op.rt]._u32[i]), qw0->r32());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::SHL(spu_opcode_t op)
|
|
{
|
|
auto body = [](u32* t, const u32* a, const u32* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = static_cast<u32>(static_cast<u64>(a[i]) << b[i]);
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 4; i++) // unrolled loop
|
|
//{
|
|
// c->mov(qw0->r32(), SPU_OFF_32(gpr[op.ra]._u32[i]));
|
|
// c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[i]));
|
|
// c->shl(*qw0, *addr);
|
|
// c->mov(SPU_OFF_32(gpr[op.rt]._u32[i]), qw0->r32());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTH(spu_opcode_t op) //nf
|
|
{
|
|
auto body = [](u16* t, const u16* a, const s16* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
t[i] = (a[i] << b[i]) | (a[i] >> (16 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u16*, const u16*, const s16*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 8; i++) // unrolled loop
|
|
//{
|
|
// c->movzx(qw0->r32(), SPU_OFF_16(gpr[op.ra]._u16[i]));
|
|
// c->movzx(*addr, SPU_OFF_16(gpr[op.rb]._u16[i]));
|
|
// c->rol(qw0->r16(), *addr);
|
|
// c->mov(SPU_OFF_16(gpr[op.rt]._u16[i]), qw0->r16());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTHM(spu_opcode_t op)
|
|
{
|
|
auto body = [](u16* t, const u16* a, const u16* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
t[i] = static_cast<u16>(static_cast<u32>(a[i]) >> (0 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u16*, const u16*, const u16*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 8; i++) // unrolled loop
|
|
//{
|
|
// c->movzx(qw0->r32(), SPU_OFF_16(gpr[op.ra]._u16[i]));
|
|
// c->movzx(*addr, SPU_OFF_16(gpr[op.rb]._u16[i]));
|
|
// c->neg(*addr);
|
|
// c->shr(qw0->r32(), *addr);
|
|
// c->mov(SPU_OFF_16(gpr[op.rt]._u16[i]), qw0->r16());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTMAH(spu_opcode_t op)
|
|
{
|
|
auto body = [](s16* t, const s16* a, const u16* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
t[i] = static_cast<s16>(static_cast<s32>(a[i]) >> (0 - b[i]));
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(s16*, const s16*, const u16*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 8; i++) // unrolled loop
|
|
//{
|
|
// c->movsx(qw0->r32(), SPU_OFF_16(gpr[op.ra]._u16[i]));
|
|
// c->movzx(*addr, SPU_OFF_16(gpr[op.rb]._u16[i]));
|
|
// c->neg(*addr);
|
|
// c->sar(qw0->r32(), *addr);
|
|
// c->mov(SPU_OFF_16(gpr[op.rt]._u16[i]), qw0->r16());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::SHLH(spu_opcode_t op)
|
|
{
|
|
auto body = [](u16* t, const u16* a, const u16* b) noexcept
|
|
{
|
|
for (u32 i = 0; i < 8; i++)
|
|
{
|
|
t[i] = static_cast<u16>(static_cast<u32>(a[i]) << b[i]);
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u16*, const u16*, const u16*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
|
|
//for (u32 i = 0; i < 8; i++) // unrolled loop
|
|
//{
|
|
// c->movzx(qw0->r32(), SPU_OFF_16(gpr[op.ra]._u16[i]));
|
|
// c->movzx(*addr, SPU_OFF_16(gpr[op.rb]._u16[i]));
|
|
// c->shl(qw0->r32(), *addr);
|
|
// c->mov(SPU_OFF_16(gpr[op.rt]._u16[i]), qw0->r16());
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::ROTI(spu_opcode_t op)
|
|
{
|
|
// rotate left
|
|
const int s = op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& v1 = XmmAlloc();
|
|
c->movdqa(v1, va);
|
|
c->pslld(va, s);
|
|
c->psrld(v1, 32 - s);
|
|
c->por(va, v1);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTMI(spu_opcode_t op)
|
|
{
|
|
// shift right logical
|
|
const int s = 0-op.i7 & 0x3f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psrld(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTMAI(spu_opcode_t op)
|
|
{
|
|
// shift right arithmetical
|
|
const int s = 0-op.i7 & 0x3f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psrad(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SHLI(spu_opcode_t op)
|
|
{
|
|
// shift left
|
|
const int s = op.i7 & 0x3f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pslld(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTHI(spu_opcode_t op)
|
|
{
|
|
// rotate left (halfword)
|
|
const int s = op.i7 & 0xf;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& v1 = XmmAlloc();
|
|
c->movdqa(v1, va);
|
|
c->psllw(va, s);
|
|
c->psrlw(v1, 16 - s);
|
|
c->por(va, v1);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTHMI(spu_opcode_t op)
|
|
{
|
|
// shift right logical
|
|
const int s = 0-op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psrlw(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTMAHI(spu_opcode_t op)
|
|
{
|
|
// shift right arithmetical (halfword)
|
|
const int s = 0-op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psraw(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SHLHI(spu_opcode_t op)
|
|
{
|
|
// shift left (halfword)
|
|
const int s = op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psllw(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::A(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->paddd(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::AND(spu_opcode_t op)
|
|
{
|
|
// and
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pand(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::CG(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(0x80000000)));
|
|
c->paddd(vb, va);
|
|
c->pxor(va, vi);
|
|
c->pxor(vb, vi);
|
|
c->pcmpgtd(va, vb);
|
|
c->psrld(va, 31);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::AH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->paddw(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::NAND(spu_opcode_t op)
|
|
{
|
|
// nand
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pand(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->pxor(va, XmmConst(_mm_set1_epi32(0xffffffff)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::AVGB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pavgb(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::MTSPR(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op);
|
|
}
|
|
|
|
void spu_recompiler::WRCH(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op); // TODO
|
|
}
|
|
|
|
void spu_recompiler::BIZ(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
|
c->cmp(SPU_OFF_32(gpr[op.rt]._u32[3]), 0);
|
|
c->je(*jt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::BINZ(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
|
c->cmp(SPU_OFF_32(gpr[op.rt]._u32[3]), 0);
|
|
c->jne(*jt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::BIHZ(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
|
c->cmp(SPU_OFF_16(gpr[op.rt]._u16[6]), 0);
|
|
c->je(*jt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::BIHNZ(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
|
c->cmp(SPU_OFF_16(gpr[op.rt]._u16[6]), 0);
|
|
c->jne(*jt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::STOPD(spu_opcode_t op)
|
|
{
|
|
InterpreterCall(op);
|
|
}
|
|
|
|
void spu_recompiler::STQX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0x3fff0);
|
|
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(asmjit::host::oword_ptr(*ls, *addr), vt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::BI(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags neutralize jump table
|
|
c->jmp(*jt);
|
|
}
|
|
|
|
void spu_recompiler::BISL(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0x3fffc);
|
|
if (op.d || op.e) c->or_(*addr, op.e << 26 | op.d << 27); // interrupt flags stored to PC
|
|
c->mov(SPU_OFF_32(pc), *addr);
|
|
c->unuse(*addr);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(vr);
|
|
|
|
FunctionCall();
|
|
}
|
|
|
|
void spu_recompiler::IRET(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unimplemented instruction");
|
|
}
|
|
|
|
void spu_recompiler::BISLED(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unimplemented instruction");
|
|
}
|
|
|
|
void spu_recompiler::HBR(spu_opcode_t op)
|
|
{
|
|
}
|
|
|
|
void spu_recompiler::GB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pshufb(va, XmmConst(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0)));
|
|
c->psllq(va, 7);
|
|
c->pmovmskb(*addr, va);
|
|
c->pxor(va, va);
|
|
c->pinsrw(va, *addr, 6);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::GBH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pshufb(va, XmmConst(_mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 14, 12, 10, 8, 6, 4, 2, 0)));
|
|
c->psllq(va, 7);
|
|
c->pmovmskb(*addr, va);
|
|
c->pxor(va, va);
|
|
c->pinsrw(va, *addr, 6);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::GBB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psllq(va, 7);
|
|
c->pmovmskb(*addr, va);
|
|
c->pxor(va, va);
|
|
c->pinsrw(va, *addr, 6);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::FSM(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsm));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0xf);
|
|
c->shl(*addr, 4);
|
|
c->movdqa(vr, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::FSMH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmh));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0xff);
|
|
c->shl(*addr, 4);
|
|
c->movdqa(vr, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::FSMB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.fsmb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->and_(*addr, 0xffff);
|
|
c->shl(*addr, 4);
|
|
c->movdqa(vr, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::FREST(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->rcpps(va, va);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FRSQEST(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->andps(va, XmmConst(_mm_set1_epi32(0x7fffffff))); // abs
|
|
c->rsqrtps(va, va);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::LQX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0x3fff0);
|
|
|
|
const XmmLink& vt = XmmAlloc();
|
|
c->movdqa(vt, asmjit::host::oword_ptr(*ls, *addr));
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::ROTQBYBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.rldq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0xf << 3);
|
|
c->shl(*addr, 1);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::ROTQMBYBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.srdq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->shr(*addr, 3);
|
|
c->neg(*addr);
|
|
c->and_(*addr, 0x1f);
|
|
c->shl(*addr, 4);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::SHLQBYBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.sldq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0x1f << 3);
|
|
c->shl(*addr, 1);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::CBX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xf);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::byte_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x03);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CHX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xe);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::word_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x0203);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CWX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xc);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::dword_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x00010203);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CDX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->add(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0x8);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
|
|
c->mov(asmjit::host::qword_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), *qw0);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::ROTQBI(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->mov(*qw2, *qw0);
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 7);
|
|
c->shld(*qw0, *qw1, *addr);
|
|
c->shld(*qw1, *qw2, *addr);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
c->unuse(*qw2);
|
|
}
|
|
|
|
void spu_recompiler::ROTQMBI(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->neg(*addr);
|
|
c->and_(*addr, 7);
|
|
c->shrd(*qw0, *qw1, *addr);
|
|
c->shr(*qw1, *addr);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
}
|
|
|
|
void spu_recompiler::SHLQBI(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 7);
|
|
c->shld(*qw1, *qw0, *addr);
|
|
c->shl(*qw0, *addr);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
}
|
|
|
|
void spu_recompiler::ROTQBY(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.rldq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0xf);
|
|
c->shl(*addr, 4);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::ROTQMBY(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.srdq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->neg(*addr);
|
|
c->and_(*addr, 0x1f);
|
|
c->shl(*addr, 4);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::SHLQBY(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->mov(*qw0, asmjit::imm_ptr((void*)g_spu_imm.sldq_pshufb));
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
c->and_(*addr, 0x1f);
|
|
c->shl(*addr, 4);
|
|
c->pshufb(va, asmjit::host::oword_ptr(*qw0, *addr));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::ORX(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[0]));
|
|
c->or_(*addr, SPU_OFF_32(gpr[op.ra]._u32[1]));
|
|
c->or_(*addr, SPU_OFF_32(gpr[op.ra]._u32[2]));
|
|
c->or_(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->mov(SPU_OFF_32(gpr[op.rt]._u32[3]), *addr);
|
|
c->xor_(*addr, *addr);
|
|
c->mov(SPU_OFF_32(gpr[op.rt]._u32[0]), *addr);
|
|
c->mov(SPU_OFF_32(gpr[op.rt]._u32[1]), *addr);
|
|
c->mov(SPU_OFF_32(gpr[op.rt]._u32[2]), *addr);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CBD(spu_opcode_t op)
|
|
{
|
|
//if (op.ra == 1)
|
|
//{
|
|
// // assuming that SP % 16 is always zero
|
|
// const XmmLink& vr = XmmAlloc();
|
|
// v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
|
// value.u8r[op.i7 & 0xf] = 0x03;
|
|
// c->movdqa(vr, XmmConst(value));
|
|
// c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
// return;
|
|
//}
|
|
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.i7) c->add(*addr, op.i7);
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xf);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::byte_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x03);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CHD(spu_opcode_t op)
|
|
{
|
|
//if (op.ra == 1)
|
|
//{
|
|
// // assuming that SP % 16 is always zero
|
|
// const XmmLink& vr = XmmAlloc();
|
|
// v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
|
// value.u16r[(op.i7 >> 1) & 0x7] = 0x0203;
|
|
// c->movdqa(vr, XmmConst(value));
|
|
// c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
// return;
|
|
//}
|
|
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.i7) c->add(*addr, op.i7);
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xe);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::word_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x0203);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CWD(spu_opcode_t op)
|
|
{
|
|
//if (op.ra == 1)
|
|
//{
|
|
// // assuming that SP % 16 is always zero
|
|
// const XmmLink& vr = XmmAlloc();
|
|
// v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
|
// value.u32r[(op.i7 >> 2) & 0x3] = 0x00010203;
|
|
// c->movdqa(vr, XmmConst(value));
|
|
// c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
// return;
|
|
//}
|
|
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.i7) c->add(*addr, op.i7);
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0xc);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(asmjit::host::dword_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), 0x00010203);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CDD(spu_opcode_t op)
|
|
{
|
|
//if (op.ra == 1)
|
|
//{
|
|
// // assuming that SP % 16 is always zero
|
|
// const XmmLink& vr = XmmAlloc();
|
|
// v128 value = v128::fromV(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f));
|
|
// value.u64r[(op.i7 >> 3) & 0x1] = 0x0001020304050607ull;
|
|
// c->movdqa(vr, XmmConst(value));
|
|
// c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
// return;
|
|
//}
|
|
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.i7) c->add(*addr, op.i7);
|
|
c->not_(*addr);
|
|
c->and_(*addr, 0x8);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(0x10111213, 0x14151617, 0x18191a1b, 0x1c1d1e1f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->mov(*qw0, asmjit::imm_u(0x0001020304050607));
|
|
c->mov(asmjit::host::qword_ptr(*cpu, *addr, 0, OFFSET_OF(SPUThread, gpr[op.rt])), *qw0);
|
|
c->unuse(*addr);
|
|
c->unuse(*qw0);
|
|
}
|
|
|
|
void spu_recompiler::ROTQBII(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->mov(*qw2, *qw0);
|
|
c->shld(*qw0, *qw1, op.i7 & 0x7);
|
|
c->shld(*qw1, *qw2, op.i7 & 0x7);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
c->unuse(*qw2);
|
|
}
|
|
|
|
void spu_recompiler::ROTQMBII(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->shrd(*qw0, *qw1, 0-op.i7 & 0x7);
|
|
c->shr(*qw1, 0-op.i7 & 0x7);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
}
|
|
|
|
void spu_recompiler::SHLQBII(spu_opcode_t op)
|
|
{
|
|
c->mov(*qw0, SPU_OFF_64(gpr[op.ra]._u64[0]));
|
|
c->mov(*qw1, SPU_OFF_64(gpr[op.ra]._u64[1]));
|
|
c->shld(*qw1, *qw0, op.i7 & 0x7);
|
|
c->shl(*qw0, op.i7 & 0x7);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._u64[1]), *qw1);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
}
|
|
|
|
void spu_recompiler::ROTQBYI(spu_opcode_t op)
|
|
{
|
|
const int s = op.i7 & 0xf;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->palignr(va, va, 16 - s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ROTQMBYI(spu_opcode_t op)
|
|
{
|
|
const int s = 0-op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psrldq(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SHLQBYI(spu_opcode_t op)
|
|
{
|
|
const int s = op.i7 & 0x1f;
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pslldq(va, s);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::NOP(spu_opcode_t op)
|
|
{
|
|
}
|
|
|
|
void spu_recompiler::CGT(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::XOR(spu_opcode_t op)
|
|
{
|
|
// xor
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CGTH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtw(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::EQV(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pxor(vb, XmmConst(_mm_set1_epi32(0xffffffff)));
|
|
c->pxor(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::CGTB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtb(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SUMB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi8(1)));
|
|
c->pmaddubsw(va, vi);
|
|
c->pmaddubsw(vb, vi);
|
|
c->phaddw(va, vb);
|
|
c->pshufb(va, XmmConst(_mm_set_epi8(15, 14, 7, 6, 13, 12, 5, 4, 11, 10, 3, 2, 9, 8, 1, 0)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
//HGT uses signed values. HLGT uses unsigned values
|
|
void spu_recompiler::HGT(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._s32[3]));
|
|
c->cmp(*addr, SPU_OFF_32(gpr[op.rb]._s32[3]));
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->jg(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CLZ(spu_opcode_t op)
|
|
{
|
|
auto body = [](u32* t, const u32* a) noexcept
|
|
{
|
|
for (u32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = cntlz32(a[i]);
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder2<void, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
|
|
//c->mov(*qw0, 32 + 31);
|
|
//for (u32 i = 0; i < 4; i++) // unrolled loop
|
|
//{
|
|
// c->bsr(*addr, SPU_OFF_32(gpr[op.ra]._u32[i]));
|
|
// c->cmovz(*addr, qw0->r32());
|
|
// c->xor_(*addr, 31);
|
|
// c->mov(SPU_OFF_32(gpr[op.rt]._u32[i]), *addr);
|
|
//}
|
|
}
|
|
|
|
void spu_recompiler::XSWD(spu_opcode_t op)
|
|
{
|
|
c->movsxd(*qw0, SPU_OFF_32(gpr[op.ra]._s32[0]));
|
|
c->movsxd(*qw1, SPU_OFF_32(gpr[op.ra]._s32[2]));
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._s64[0]), *qw0);
|
|
c->mov(SPU_OFF_64(gpr[op.rt]._s64[1]), *qw1);
|
|
c->unuse(*qw0);
|
|
c->unuse(*qw1);
|
|
}
|
|
|
|
void spu_recompiler::XSHW(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pslld(va, 16);
|
|
c->psrad(va, 16);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CNTB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& v1 = XmmAlloc();
|
|
const XmmLink& vm = XmmAlloc();
|
|
c->movdqa(v1, va);
|
|
c->psrlq(v1, 4);
|
|
c->movdqa(vm, XmmConst(_mm_set1_epi8(0xf)));
|
|
c->pand(va, vm);
|
|
c->pand(v1, vm);
|
|
c->movdqa(vm, XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
|
|
c->pshufb(vm, va);
|
|
c->movdqa(va, XmmConst(_mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0)));
|
|
c->pshufb(va, v1);
|
|
c->paddb(va, vm);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::XSBH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psllw(va, 8);
|
|
c->psraw(va, 8);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CLGT(spu_opcode_t op)
|
|
{
|
|
// compare if-greater-than
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(0x80000000)));
|
|
c->pxor(va, vi);
|
|
c->pxor(vi, SPU_OFF_128(gpr[op.rb]));
|
|
c->pcmpgtd(va, vi);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ANDC(spu_opcode_t op)
|
|
{
|
|
// and not
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pandn(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::FCGT(spu_opcode_t op)
|
|
{
|
|
// reverted less-than
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Float);
|
|
c->cmpps(vb, SPU_OFF_128(gpr[op.ra]), 1);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::DFCGT(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unexpected instruction");
|
|
}
|
|
|
|
void spu_recompiler::FA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->addps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->subps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FM(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->mulps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CLGTH(spu_opcode_t op)
|
|
{
|
|
// compare if-greater-than
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi16(INT16_MIN)));
|
|
c->pxor(va, vi);
|
|
c->pxor(vi, SPU_OFF_128(gpr[op.rb]));
|
|
c->pcmpgtw(va, vi);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ORC(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pxor(vb, XmmConst(_mm_set1_epi32(0xffffffff)));
|
|
c->por(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::FCMGT(spu_opcode_t op)
|
|
{
|
|
// reverted less-than
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Float);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movaps(vi, XmmConst(_mm_set1_epi32(0x7fffffff)));
|
|
c->andps(vb, vi); // abs
|
|
c->andps(vi, SPU_OFF_128(gpr[op.ra]));
|
|
c->cmpps(vb, vi, 1);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::DFCMGT(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unexpected instruction");
|
|
}
|
|
|
|
void spu_recompiler::DFA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->addpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::DFS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->subpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::DFM(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->mulpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CLGTB(spu_opcode_t op)
|
|
{
|
|
// compare if-greater-than
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi8(INT8_MIN)));
|
|
c->pxor(va, vi);
|
|
c->pxor(vi, SPU_OFF_128(gpr[op.rb]));
|
|
c->pcmpgtb(va, vi);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::HLGT(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->cmp(*addr, SPU_OFF_32(gpr[op.rb]._u32[3]));
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->ja(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::DFMA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmGet(op.rt, XmmType::Double);
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->mulpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->addpd(vr, va);
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::DFMS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Double);
|
|
c->mulpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->subpd(va, vt);
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::DFNMS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmGet(op.rt, XmmType::Double);
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->mulpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->subpd(vr, va);
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::DFNMA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Double);
|
|
c->mulpd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->addpd(vt, va);
|
|
c->xorpd(va, va);
|
|
c->subpd(va, vt);
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQ(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqd(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::MPYHHU(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& va2 = XmmAlloc();
|
|
c->movdqa(va2, va);
|
|
c->pmulhuw(va, vb);
|
|
c->pmullw(va2, vb);
|
|
c->pand(va, XmmConst(_mm_set1_epi32(0xffff0000)));
|
|
c->psrld(va2, 16);
|
|
c->por(va, va2);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ADDX(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->pand(vt, XmmConst(_mm_set1_epi32(1)));
|
|
c->paddd(vt, SPU_OFF_128(gpr[op.ra]));
|
|
c->paddd(vt, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::SFX(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pandn(vt, XmmConst(_mm_set1_epi32(1)));
|
|
c->psubd(vb, SPU_OFF_128(gpr[op.ra]));
|
|
c->psubd(vb, vt);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::CGX(spu_opcode_t op) //nf
|
|
{
|
|
auto body = [](u32* t, const u32* a, const u32* b) noexcept
|
|
{
|
|
for (s32 i = 0; i < 4; i++)
|
|
{
|
|
t[i] = (static_cast<u64>(t[i] & 1) + a[i] + b[i]) >> 32;
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
}
|
|
|
|
void spu_recompiler::BGX(spu_opcode_t op) //nf
|
|
{
|
|
auto body = [](u32* t, const u32* a, const u32* b) noexcept
|
|
{
|
|
for (s32 i = 0; i < 4; i++)
|
|
{
|
|
const s64 result = (u64)b[i] - (u64)a[i] - (u64)(1 - (t[i] & 1));
|
|
t[i] = result >= 0;
|
|
}
|
|
};
|
|
|
|
c->lea(*qw0, SPU_OFF_128(gpr[op.rt]));
|
|
c->lea(*qw1, SPU_OFF_128(gpr[op.ra]));
|
|
c->lea(*qw2, SPU_OFF_128(gpr[op.rb]));
|
|
asmjit::X86CallNode* call = c->call(asmjit::imm_ptr(asmjit_cast<void*, void(u32*, const u32*, const u32*)>(body)), asmjit::kFuncConvHost, asmjit::FuncBuilder3<void, void*, void*, void*>());
|
|
call->setArg(0, *qw0);
|
|
call->setArg(1, *qw1);
|
|
call->setArg(2, *qw2);
|
|
}
|
|
|
|
void spu_recompiler::MPYHHA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->psrld(va, 16);
|
|
c->psrld(vb, 16);
|
|
c->pmaddwd(va, vb);
|
|
c->paddd(vt, va);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::MPYHHAU(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& va2 = XmmAlloc();
|
|
c->movdqa(va2, va);
|
|
c->pmulhuw(va, vb);
|
|
c->pmullw(va2, vb);
|
|
c->pand(va, XmmConst(_mm_set1_epi32(0xffff0000)));
|
|
c->psrld(va2, 16);
|
|
c->paddd(vt, va);
|
|
c->paddd(vt, va2);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::FSCRRD(spu_opcode_t op)
|
|
{
|
|
// zero (hack)
|
|
const XmmLink& v0 = XmmAlloc();
|
|
c->pxor(v0, v0);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), v0);
|
|
}
|
|
|
|
void spu_recompiler::FESD(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->shufps(va, va, 0x8d); // _f[0] = _f[1]; _f[1] = _f[3];
|
|
c->cvtps2pd(va, va);
|
|
c->movapd(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FRDS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Double);
|
|
c->cvtpd2ps(va, va);
|
|
c->shufps(va, va, 0x72); // _f[1] = _f[0]; _f[3] = _f[1]; _f[0] = _f[2] = 0;
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FSCRWR(spu_opcode_t op)
|
|
{
|
|
// nop (not implemented)
|
|
}
|
|
|
|
void spu_recompiler::DFTSV(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unexpected instruction");
|
|
}
|
|
|
|
void spu_recompiler::FCEQ(spu_opcode_t op)
|
|
{
|
|
// compare equal
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Float);
|
|
c->cmpps(vb, SPU_OFF_128(gpr[op.ra]), 0);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::DFCEQ(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unexpected instruction");
|
|
}
|
|
|
|
void spu_recompiler::MPY(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(0xffff)));
|
|
c->pand(va, vi);
|
|
c->pand(vb, vi);
|
|
c->pmaddwd(va, vb);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::MPYH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->psrld(va, 16);
|
|
c->pmullw(va, vb);
|
|
c->pslld(va, 16);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::MPYHH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->psrld(va, 16);
|
|
c->psrld(vb, 16);
|
|
c->pmaddwd(va, vb);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::MPYS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
c->pmulhw(va, vb);
|
|
c->pslld(va, 16);
|
|
c->psrad(va, 16);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqw(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FCMEQ(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Float);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movaps(vi, XmmConst(_mm_set1_epi32(0x7fffffff)));
|
|
c->andps(vb, vi); // abs
|
|
c->andps(vi, SPU_OFF_128(gpr[op.ra]));
|
|
c->cmpps(vb, vi, 0); // ==
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::DFCMEQ(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unexpected instruction");
|
|
}
|
|
|
|
void spu_recompiler::MPYU(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& va2 = XmmAlloc();
|
|
c->movdqa(va2, va);
|
|
c->pmulhuw(va, vb);
|
|
c->pmullw(va2, vb);
|
|
c->pslld(va, 16);
|
|
c->pand(va2, XmmConst(_mm_set1_epi32(0xffff)));
|
|
c->por(va, va2);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqb(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::FI(spu_opcode_t op)
|
|
{
|
|
// Floating Interpolate
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Float);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), vb);
|
|
}
|
|
|
|
void spu_recompiler::HEQ(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._s32[3]));
|
|
c->cmp(*addr, SPU_OFF_32(gpr[op.rb]._s32[3]));
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->je(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CFLTS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
const XmmLink& vi = XmmAlloc();
|
|
if (op.i8 != 173) c->mulps(va, XmmConst(_mm_set1_ps(exp2f(static_cast<s16>(173 - op.i8))))); // scale
|
|
c->movaps(vi, XmmConst(_mm_set1_ps(exp2f(31))));
|
|
c->cmpps(vi, va, 2);
|
|
c->cvttps2dq(va, va); // convert to ints with truncation
|
|
c->pxor(va, vi); // fix result saturation (0x80000000 -> 0x7fffffff)
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CFLTU(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
const XmmLink& vs = XmmAlloc();
|
|
const XmmLink& vs2 = XmmAlloc();
|
|
const XmmLink& vs3 = XmmAlloc();
|
|
if (op.i8 != 173) c->mulps(va, XmmConst(_mm_set1_ps(exp2f(static_cast<s16>(173 - op.i8))))); // scale
|
|
c->maxps(va, XmmConst(_mm_set1_ps(0.0f))); // saturate
|
|
c->movaps(vs, va); // copy scaled value
|
|
c->movaps(vs2, va);
|
|
c->movaps(vs3, XmmConst(_mm_set1_ps(exp2f(31))));
|
|
c->subps(vs2, vs3);
|
|
c->cmpps(vs3, vs, 2);
|
|
c->andps(vs2, vs3);
|
|
c->cvttps2dq(va, va);
|
|
c->cmpps(vs, XmmConst(_mm_set1_ps(exp2f(32))), 5);
|
|
c->cvttps2dq(vs2, vs2);
|
|
c->por(va, vs);
|
|
c->por(va, vs2);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CSFLT(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->cvtdq2ps(va, va); // convert to floats
|
|
if (op.i8 != 155) c->mulps(va, XmmConst(_mm_set1_ps(exp2f(static_cast<s16>(op.i8 - 155))))); // scale
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CUFLT(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& v1 = XmmAlloc();
|
|
c->movdqa(v1, va);
|
|
c->pand(va, XmmConst(_mm_set1_epi32(0x7fffffff)));
|
|
c->cvtdq2ps(va, va); // convert to floats
|
|
c->psrad(v1, 31); // generate mask from sign bit
|
|
c->andps(v1, XmmConst(_mm_set1_ps(exp2f(31)))); // generate correction component
|
|
c->addps(va, v1); // add correction component
|
|
if (op.i8 != 155) c->mulps(va, XmmConst(_mm_set1_ps(exp2f(static_cast<s16>(op.i8 - 155))))); // scale
|
|
c->movaps(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::BRZ(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
c->cmp(SPU_OFF_32(gpr[op.rt]._u32[3]), 0);
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->je(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (brz 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->je(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::STQA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(asmjit::host::oword_ptr(*ls, spu_ls_target(0, op.i16)), vt);
|
|
}
|
|
|
|
void spu_recompiler::BRNZ(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
c->cmp(SPU_OFF_32(gpr[op.rt]._u32[3]), 0);
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->jne(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (brnz 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->jne(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::BRHZ(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
c->cmp(SPU_OFF_16(gpr[op.rt]._u16[6]), 0);
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->je(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (brhz 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->je(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::BRHNZ(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
c->cmp(SPU_OFF_16(gpr[op.rt]._u16[6]), 0);
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->jne(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (brhnz 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->jne(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::STQR(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(asmjit::host::oword_ptr(*ls, spu_ls_target(m_pos, op.i16)), vt);
|
|
}
|
|
|
|
void spu_recompiler::BRA(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(0, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->jmp(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (bra 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->jmp(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::LQA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmAlloc();
|
|
c->movdqa(vt, asmjit::host::oword_ptr(*ls, spu_ls_target(0, op.i16)));
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::BRASL(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(0, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(vr);
|
|
|
|
c->mov(SPU_OFF_32(pc), target);
|
|
|
|
FunctionCall();
|
|
}
|
|
|
|
void spu_recompiler::BR(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos)
|
|
{
|
|
c->mov(*addr, target | 0x2000000);
|
|
//c->cmp(asmjit::host::dword_ptr(*ls, m_pos), 0x32); // compare instruction opcode with BR-to-self
|
|
//c->je(labels[target / 4]);
|
|
c->lock().or_(SPU_OFF_64(m_state), CPU_STATE_RETURN | CPU_STATE_STOPPED);
|
|
c->jmp(*end);
|
|
c->unuse(*addr);
|
|
return;
|
|
}
|
|
|
|
if (labels[target / 4].isInitialized())
|
|
{
|
|
c->jmp(labels[target / 4]);
|
|
}
|
|
else
|
|
{
|
|
if (target >= m_func->addr && target < m_func->addr + m_func->size)
|
|
{
|
|
LOG_ERROR(SPU, "Local block not registered (brz 0x%x)", target);
|
|
}
|
|
|
|
c->mov(*addr, target);
|
|
c->jmp(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
}
|
|
|
|
void spu_recompiler::FSMBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(g_spu_imm.fsmb[op.i16]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::BRSL(spu_opcode_t op)
|
|
{
|
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
|
|
|
if (target == m_pos) throw EXCEPTION("Branch-to-self (0x%05x)", target);
|
|
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set_epi32(spu_branch_target(m_pos + 4), 0, 0, 0)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
c->unuse(vr);
|
|
|
|
if (target == spu_branch_target(m_pos + 4))
|
|
{
|
|
// branch-to-next
|
|
return;
|
|
}
|
|
|
|
c->mov(SPU_OFF_32(pc), target);
|
|
|
|
FunctionCall();
|
|
}
|
|
|
|
void spu_recompiler::LQR(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmAlloc();
|
|
c->movdqa(vt, asmjit::host::oword_ptr(*ls, spu_ls_target(m_pos, op.i16)));
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::IL(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi32(op.si16)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::ILHU(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi32(op.i16 << 16)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::ILH(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi16(op.i16)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::IOHL(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->por(vt, XmmConst(_mm_set1_epi32(op.i16)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
}
|
|
|
|
void spu_recompiler::ORI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
if (op.si10) c->por(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ORHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->por(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ORBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->por(va, XmmConst(_mm_set1_epi8(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::SFI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->psubd(vr, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::SFHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->psubw(vr, SPU_OFF_128(gpr[op.ra]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::ANDI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pand(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ANDHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pand(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::ANDBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pand(va, XmmConst(_mm_set1_epi8(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::AI(spu_opcode_t op)
|
|
{
|
|
// add
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->paddd(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::AHI(spu_opcode_t op)
|
|
{
|
|
// add
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->paddw(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::STQD(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.si10) c->add(*addr, op.si10 << 4);
|
|
c->and_(*addr, 0x3fff0);
|
|
|
|
const XmmLink& vt = XmmGet(op.rt, XmmType::Int);
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(asmjit::host::oword_ptr(*ls, *addr), vt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::LQD(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
if (op.si10) c->add(*addr, op.si10 << 4);
|
|
c->and_(*addr, 0x3fff0);
|
|
|
|
const XmmLink& vt = XmmAlloc();
|
|
c->movdqa(vt, asmjit::host::oword_ptr(*ls, *addr));
|
|
c->pshufb(vt, XmmConst(_mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vt);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::XORI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::XORHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::XORBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, XmmConst(_mm_set1_epi8(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CGTI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtd(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CGTHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtw(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CGTBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpgtb(va, XmmConst(_mm_set1_epi8(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::HGTI(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._s32[3]));
|
|
c->cmp(*addr, op.si10);
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->jg(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::CLGTI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, XmmConst(_mm_set1_epi32(0x80000000)));
|
|
c->pcmpgtd(va, XmmConst(_mm_set1_epi32(op.si10 - 0x80000000)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CLGTHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pxor(va, XmmConst(_mm_set1_epi16(INT16_MIN)));
|
|
c->pcmpgtw(va, XmmConst(_mm_set1_epi16(op.si10 - 0x8000)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CLGTBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->psubb(va, XmmConst(_mm_set1_epi8(INT8_MIN)));
|
|
c->pcmpgtb(va, XmmConst(_mm_set1_epi8(op.si10 - 0x80)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::HLGTI(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->cmp(*addr, op.si10);
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->ja(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::MPYI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pmaddwd(va, XmmConst(_mm_set1_epi32(op.si10 & 0xffff)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::MPYUI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
const XmmLink& va2 = XmmAlloc();
|
|
c->movdqa(va2, va);
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(op.si10 & 0xffff)));
|
|
c->pmulhuw(va, vi);
|
|
c->pmullw(va2, vi);
|
|
c->pslld(va, 16);
|
|
c->por(va, va2);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqd(va, XmmConst(_mm_set1_epi32(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQHI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqw(va, XmmConst(_mm_set1_epi16(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::CEQBI(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
c->pcmpeqb(va, XmmConst(_mm_set1_epi8(op.si10)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), va);
|
|
}
|
|
|
|
void spu_recompiler::HEQI(spu_opcode_t op)
|
|
{
|
|
c->mov(*addr, SPU_OFF_32(gpr[op.ra]._u32[3]));
|
|
c->cmp(*addr, op.si10);
|
|
|
|
c->mov(*addr, m_pos | 0x1000000);
|
|
c->je(*end);
|
|
c->unuse(*addr);
|
|
}
|
|
|
|
void spu_recompiler::HBRA(spu_opcode_t op)
|
|
{
|
|
}
|
|
|
|
void spu_recompiler::HBRR(spu_opcode_t op)
|
|
{
|
|
}
|
|
|
|
void spu_recompiler::ILA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vr = XmmAlloc();
|
|
c->movdqa(vr, XmmConst(_mm_set1_epi32(op.i18)));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt]), vr);
|
|
}
|
|
|
|
void spu_recompiler::SELB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vc = XmmGet(op.rc, XmmType::Int);
|
|
c->pand(vb, vc);
|
|
c->pandn(vc, SPU_OFF_128(gpr[op.ra]));
|
|
c->por(vb, vc);
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt4]), vb);
|
|
}
|
|
|
|
void spu_recompiler::SHUFB(spu_opcode_t op)
|
|
{
|
|
const XmmLink& v0 = XmmGet(op.rc, XmmType::Int); // v0 = mask
|
|
const XmmLink& v1 = XmmAlloc();
|
|
const XmmLink& v2 = XmmAlloc();
|
|
const XmmLink& v3 = XmmAlloc();
|
|
const XmmLink& v4 = XmmAlloc();
|
|
const XmmLink& vFF = XmmAlloc();
|
|
c->movdqa(v2, v0); // v2 = mask
|
|
// generate specific values:
|
|
c->movdqa(v1, XmmConst(_mm_set1_epi8(-0x20))); // v1 = 11100000
|
|
c->movdqa(v3, XmmConst(_mm_set1_epi8(-0x80))); // v3 = 10000000
|
|
c->pand(v2, v1); // filter mask v2 = mask & 11100000
|
|
c->movdqa(vFF, v2); // and copy vFF = mask & 11100000
|
|
c->movdqa(v4, XmmConst(_mm_set1_epi8(-0x40))); // v4 = 11000000
|
|
c->pcmpeqb(vFF, v4); // gen 0xff vFF = (mask & 11100000 == 11000000) ? 0xff : 0
|
|
c->movdqa(v4, v2); // copy again v4 = mask & 11100000
|
|
c->pand(v4, v3); // filter mask v4 = mask & 10000000
|
|
c->pcmpeqb(v2, v1); // v2 = (mask & 11100000 == 11100000) ? 0xff : 0
|
|
c->pcmpeqb(v4, v3); // v4 = (mask & 10000000 == 10000000) ? 0xff : 0
|
|
c->pand(v2, v3); // generate 0x80 v2 = (mask & 11100000 == 11100000) ? 0x80 : 0
|
|
c->por(vFF, v2); // merge 0xff, 0x80 vFF = (mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0
|
|
c->pandn(v1, v0); // filter mask v1 = mask & 00011111
|
|
// select bytes from [op.rb]:
|
|
c->movdqa(v2, XmmConst(_mm_set1_epi8(0x0f))); // v2 = 00001111
|
|
c->pxor(v1, XmmConst(_mm_set1_epi8(0x10))); // v1 = (mask & 00011111) ^ 00010000
|
|
c->psubb(v2, v1); // v2 = 00001111 - ((mask & 00011111) ^ 00010000)
|
|
c->movdqa(v1, SPU_OFF_128(gpr[op.rb])); // v1 = op.rb
|
|
c->pshufb(v1, v2); // v1 = select(op.rb, 00001111 - ((mask & 00011111) ^ 00010000))
|
|
// select bytes from [op.ra]:
|
|
c->pxor(v2, XmmConst(_mm_set1_epi8(-0x10))); // v2 = (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000
|
|
c->movdqa(v3, SPU_OFF_128(gpr[op.ra])); // v3 = op.ra
|
|
c->pshufb(v3, v2); // v3 = select(op.ra, (00001111 - ((mask & 00011111) ^ 00010000)) ^ 11110000)
|
|
c->por(v1, v3); // v1 = select(op.rb, 00001111 - ((mask & 00011111) ^ 00010000)) | (v3)
|
|
c->pandn(v4, v1); // filter result v4 = v1 & ((mask & 10000000 == 10000000) ? 0 : 0xff)
|
|
c->por(vFF, v4); // final merge vFF = (mask & 10000000 == 10000000) ? ((mask & 11100000 == 11000000) ? 0xff : (mask & 11100000 == 11100000) ? 0x80 : 0) : (v1)
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt4]), vFF);
|
|
}
|
|
|
|
void spu_recompiler::MPYA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Int);
|
|
const XmmLink& vb = XmmGet(op.rb, XmmType::Int);
|
|
const XmmLink& vi = XmmAlloc();
|
|
c->movdqa(vi, XmmConst(_mm_set1_epi32(0xffff)));
|
|
c->pand(va, vi);
|
|
c->pand(vb, vi);
|
|
c->pmaddwd(va, vb);
|
|
c->paddd(va, SPU_OFF_128(gpr[op.rc]));
|
|
c->movdqa(SPU_OFF_128(gpr[op.rt4]), va);
|
|
}
|
|
|
|
void spu_recompiler::FNMS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
const XmmLink& vc = XmmGet(op.rc, XmmType::Float);
|
|
c->mulps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->subps(vc, va);
|
|
c->movaps(SPU_OFF_128(gpr[op.rt4]), vc);
|
|
}
|
|
|
|
void spu_recompiler::FMA(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->mulps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->addps(va, SPU_OFF_128(gpr[op.rc]));
|
|
c->movaps(SPU_OFF_128(gpr[op.rt4]), va);
|
|
}
|
|
|
|
void spu_recompiler::FMS(spu_opcode_t op)
|
|
{
|
|
const XmmLink& va = XmmGet(op.ra, XmmType::Float);
|
|
c->mulps(va, SPU_OFF_128(gpr[op.rb]));
|
|
c->subps(va, SPU_OFF_128(gpr[op.rc]));
|
|
c->movaps(SPU_OFF_128(gpr[op.rt4]), va);
|
|
}
|
|
|
|
void spu_recompiler::UNK(spu_opcode_t op)
|
|
{
|
|
throw EXCEPTION("Unknown/Illegal opcode (0x%08x)", op.opcode);
|
|
}
|
|
|
|
const spu_opcode_table_t<void(spu_recompiler::*)(spu_opcode_t)> spu_recompiler::opcodes{ DEFINE_SPU_OPCODES(&spu_recompiler::), &spu_recompiler::UNK };
|