mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 14:31:24 +12:00
SPU LLVM: Initial precompilation of tail-calls
This commit is contained in:
parent
a9810ccb72
commit
a626ccfcad
4 changed files with 124 additions and 15 deletions
|
@ -14,12 +14,13 @@ struct spu_itype
|
||||||
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
|
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
|
||||||
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
|
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
|
||||||
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
|
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
|
||||||
|
static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR
|
||||||
|
|
||||||
enum type : unsigned char
|
enum type : unsigned char
|
||||||
{
|
{
|
||||||
UNK = 0,
|
UNK = 0,
|
||||||
|
|
||||||
HEQ,
|
HEQ, // zregmod_tag first
|
||||||
HEQI,
|
HEQI,
|
||||||
HGT,
|
HGT,
|
||||||
HGTI,
|
HGTI,
|
||||||
|
@ -36,11 +37,21 @@ struct spu_itype
|
||||||
NOP,
|
NOP,
|
||||||
SYNC,
|
SYNC,
|
||||||
DSYNC,
|
DSYNC,
|
||||||
MFSPR,
|
|
||||||
MTSPR,
|
MTSPR,
|
||||||
|
WRCH,
|
||||||
|
|
||||||
|
STQD, // memory_tag first
|
||||||
|
STQX,
|
||||||
|
STQA,
|
||||||
|
STQR, // zregmod_tag last
|
||||||
|
LQD,
|
||||||
|
LQX,
|
||||||
|
LQA,
|
||||||
|
LQR, // memory_tag last
|
||||||
|
|
||||||
|
MFSPR,
|
||||||
RDCH,
|
RDCH,
|
||||||
RCHCNT,
|
RCHCNT,
|
||||||
WRCH,
|
|
||||||
|
|
||||||
BR, // branch_tag first
|
BR, // branch_tag first
|
||||||
BRA,
|
BRA,
|
||||||
|
@ -59,15 +70,6 @@ struct spu_itype
|
||||||
BIHZ,
|
BIHZ,
|
||||||
BIHNZ, // branch_tag last
|
BIHNZ, // branch_tag last
|
||||||
|
|
||||||
LQD, // memory_tag first
|
|
||||||
LQX,
|
|
||||||
LQA,
|
|
||||||
LQR,
|
|
||||||
STQD,
|
|
||||||
STQX,
|
|
||||||
STQA,
|
|
||||||
STQR, // memory_tag last
|
|
||||||
|
|
||||||
ILH, // constant_tag_first
|
ILH, // constant_tag_first
|
||||||
ILHU,
|
ILHU,
|
||||||
IL,
|
IL,
|
||||||
|
@ -267,7 +269,7 @@ struct spu_itype
|
||||||
// Test for memory instruction
|
// Test for memory instruction
|
||||||
friend constexpr bool operator &(type value, memory_tag)
|
friend constexpr bool operator &(type value, memory_tag)
|
||||||
{
|
{
|
||||||
return value >= LQD && value <= STQR;
|
return value >= STQD && value <= LQR;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test for compare instruction
|
// Test for compare instruction
|
||||||
|
@ -293,6 +295,12 @@ struct spu_itype
|
||||||
{
|
{
|
||||||
return value >= ILH && value <= FSMBI;
|
return value >= ILH && value <= FSMBI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test for non register-modifying instruction
|
||||||
|
friend constexpr bool operator &(type value, zregmod_tag)
|
||||||
|
{
|
||||||
|
return value >= HEQ && value <= STQR;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct spu_iflag
|
struct spu_iflag
|
||||||
|
|
|
@ -851,6 +851,13 @@ public:
|
||||||
}
|
}
|
||||||
void BR(spu_opcode_t op)
|
void BR(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
|
if (op.rt && op.rt != 127u)
|
||||||
|
{
|
||||||
|
// Valid but makes no sense
|
||||||
|
DisAsm("br??", DisAsmBranchTarget(op.i16));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
DisAsm("br", DisAsmBranchTarget(op.i16));
|
DisAsm("br", DisAsmBranchTarget(op.i16));
|
||||||
}
|
}
|
||||||
void FSMBI(spu_opcode_t op)
|
void FSMBI(spu_opcode_t op)
|
||||||
|
|
|
@ -2091,21 +2091,25 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
|
||||||
std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
|
std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
|
||||||
{
|
{
|
||||||
std::vector<u32> calls;
|
std::vector<u32> calls;
|
||||||
|
std::vector<u32> branches;
|
||||||
|
|
||||||
calls.reserve(100);
|
calls.reserve(100);
|
||||||
|
|
||||||
// Discover functions
|
// Discover functions
|
||||||
// Use the most simple method: search for instructions that calls them
|
// Use the most simple method: search for instructions that calls them
|
||||||
// And then filter invalid cases (does not detect tail calls)
|
// And then filter invalid cases
|
||||||
|
// TODO: Does not detect jumptables or fixed-addr indirect calls
|
||||||
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
|
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
|
||||||
|
|
||||||
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
|
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
|
||||||
{
|
{
|
||||||
// Search for BRSL LR and BRASL LR
|
// Search for BRSL LR and BRASL LR or BR
|
||||||
// TODO: BISL
|
// TODO: BISL
|
||||||
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
|
||||||
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
|
||||||
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
|
||||||
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
|
||||||
|
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
|
||||||
const v128 result = eq_brsl | eq_brasl;
|
const v128 result = eq_brsl | eq_brasl;
|
||||||
|
|
||||||
if (!gv_testz(result))
|
if (!gv_testz(result))
|
||||||
|
@ -2118,6 +2122,17 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!gv_testz(eq_br))
|
||||||
|
{
|
||||||
|
for (u32 j = 0; j < 4; j++)
|
||||||
|
{
|
||||||
|
if (eq_br.u32r[j])
|
||||||
|
{
|
||||||
|
branches.push_back(i + j * 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
|
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
|
||||||
|
@ -2126,6 +2141,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
|
return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
|
||||||
}), calls.end());
|
}), calls.end());
|
||||||
|
|
||||||
|
branches.erase(std::remove_if(branches.begin(), branches.end(), [&](u32 caller)
|
||||||
|
{
|
||||||
|
// Check the validity of the callee code
|
||||||
|
return !is_exec_code(caller, ls, base_addr);
|
||||||
|
}), branches.end());
|
||||||
|
|
||||||
std::vector<u32> addrs;
|
std::vector<u32> addrs;
|
||||||
|
|
||||||
for (u32 addr : calls)
|
for (u32 addr : calls)
|
||||||
|
@ -2142,6 +2163,69 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
|
||||||
addrs.push_back(func);
|
addrs.push_back(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (u32 addr : branches)
|
||||||
|
{
|
||||||
|
const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
|
||||||
|
|
||||||
|
const u32 func = op_branch_targets(addr, op)[0];
|
||||||
|
|
||||||
|
if (func == umax || addr + 4 == func || func == addr || !addr)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Search for AI R1, +x or OR R3/4, Rx, 0
|
||||||
|
// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
|
||||||
|
// R3 and R4 are common function arguments because they are the first two
|
||||||
|
for (u32 back = addr - 4, it = 5; it && back >= base_addr; back -= 4)
|
||||||
|
{
|
||||||
|
const spu_opcode_t test_op{read_from_ptr<be_t<u32>>(ls, back - base_addr)};
|
||||||
|
const auto type = g_spu_itype.decode(test_op.opcode);
|
||||||
|
|
||||||
|
if (type & spu_itype::branch)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_tail = false;
|
||||||
|
|
||||||
|
if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u)
|
||||||
|
{
|
||||||
|
if (test_op.si10 <= 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
is_tail = true;
|
||||||
|
}
|
||||||
|
else if (!(type & spu_itype::zregmod))
|
||||||
|
{
|
||||||
|
const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt;
|
||||||
|
|
||||||
|
if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u))
|
||||||
|
{
|
||||||
|
// Modifying non-volatile registers, not a call (and not context restoration)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//is_tail = op_rt == 3u || op_rt == 4u;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_tail)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::count(addrs.begin(), addrs.end(), func))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
addrs.push_back(func);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::sort(addrs.begin(), addrs.end());
|
std::sort(addrs.begin(), addrs.end());
|
||||||
|
|
||||||
return addrs;
|
return addrs;
|
||||||
|
|
|
@ -4042,8 +4042,18 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (type == spu_itype::STOP && op.rb)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (type & spu_itype::branch)
|
if (type & spu_itype::branch)
|
||||||
{
|
{
|
||||||
|
if (type == spu_itype::BR && op.rt && op.rt != 127u)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
const auto results = op_branch_targets(addr, spu_opcode_t{op});
|
const auto results = op_branch_targets(addr, spu_opcode_t{op});
|
||||||
|
|
||||||
if (results[0] == umax)
|
if (results[0] == umax)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue