SPU LLVM: Initial precompilation of tail-calls

This commit is contained in:
Eladash 2023-09-01 19:38:06 +03:00 committed by Elad Ashkenazi
parent a9810ccb72
commit a626ccfcad
4 changed files with 124 additions and 15 deletions

View file

@ -14,12 +14,13 @@ struct spu_itype
static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions static constexpr struct floating_tag{} floating{}; // Floating-Point Instructions
static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions static constexpr struct quadrop_tag{} _quadrop{}; // 4-op Instructions
static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values static constexpr struct xfloat_tag{} xfloat{}; // Instructions producing xfloat values
static constexpr struct zregmod_tag{} zregmod{}; // Instructions not modifying any GPR
enum type : unsigned char enum type : unsigned char
{ {
UNK = 0, UNK = 0,
HEQ, HEQ, // zregmod_tag first
HEQI, HEQI,
HGT, HGT,
HGTI, HGTI,
@ -36,11 +37,21 @@ struct spu_itype
NOP, NOP,
SYNC, SYNC,
DSYNC, DSYNC,
MFSPR,
MTSPR, MTSPR,
WRCH,
STQD, // memory_tag first
STQX,
STQA,
STQR, // zregmod_tag last
LQD,
LQX,
LQA,
LQR, // memory_tag last
MFSPR,
RDCH, RDCH,
RCHCNT, RCHCNT,
WRCH,
BR, // branch_tag first BR, // branch_tag first
BRA, BRA,
@ -59,15 +70,6 @@ struct spu_itype
BIHZ, BIHZ,
BIHNZ, // branch_tag last BIHNZ, // branch_tag last
LQD, // memory_tag first
LQX,
LQA,
LQR,
STQD,
STQX,
STQA,
STQR, // memory_tag last
ILH, // constant_tag_first ILH, // constant_tag_first
ILHU, ILHU,
IL, IL,
@ -267,7 +269,7 @@ struct spu_itype
// Test for memory instruction // Test for memory instruction
friend constexpr bool operator &(type value, memory_tag) friend constexpr bool operator &(type value, memory_tag)
{ {
return value >= LQD && value <= STQR; return value >= STQD && value <= LQR;
} }
// Test for compare instruction // Test for compare instruction
@ -293,6 +295,12 @@ struct spu_itype
{ {
return value >= ILH && value <= FSMBI; return value >= ILH && value <= FSMBI;
} }
// Test for non register-modifying instruction
friend constexpr bool operator &(type value, zregmod_tag)
{
return value >= HEQ && value <= STQR;
}
}; };
struct spu_iflag struct spu_iflag

View file

@ -851,6 +851,13 @@ public:
} }
void BR(spu_opcode_t op) void BR(spu_opcode_t op)
{ {
if (op.rt && op.rt != 127u)
{
// Valid but makes no sense
DisAsm("br??", DisAsmBranchTarget(op.i16));
return;
}
DisAsm("br", DisAsmBranchTarget(op.i16)); DisAsm("br", DisAsmBranchTarget(op.i16));
} }
void FSMBI(spu_opcode_t op) void FSMBI(spu_opcode_t op)

View file

@ -2091,21 +2091,25 @@ void spu_recompiler_base::old_interpreter(spu_thread& spu, void* ls, u8* /*rip*/
std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/) std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u8> ls, bool is_known_addr, u32 /*entry*/)
{ {
std::vector<u32> calls; std::vector<u32> calls;
std::vector<u32> branches;
calls.reserve(100); calls.reserve(100);
// Discover functions // Discover functions
// Use the most simple method: search for instructions that calls them // Use the most simple method: search for instructions that calls them
// And then filter invalid cases (does not detect tail calls) // And then filter invalid cases
// TODO: Does not detect jumptables or fixed-addr indirect calls
const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax); const v128 brasl_mask = is_known_addr ? v128::from32p(0x62u << 23) : v128::from32p(umax);
for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10) for (u32 i = utils::align<u32>(base_addr, 0x10); i < std::min<u32>(base_addr + ls.size(), 0x3FFF0); i += 0x10)
{ {
// Search for BRSL LR and BRASL LR // Search for BRSL LR and BRASL LR or BR
// TODO: BISL // TODO: BISL
const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr); const v128 inst = read_from_ptr<be_t<v128>>(ls.data(), i - base_addr);
const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7))); const v128 cleared_i16 = gv_and32(inst, v128::from32p(utils::rol32(~0xffff, 7)));
const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23)); const v128 eq_brsl = gv_eq32(cleared_i16, v128::from32p(0x66u << 23));
const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask); const v128 eq_brasl = gv_eq32(cleared_i16, brasl_mask);
const v128 eq_br = gv_eq32(cleared_i16, v128::from32p(0x64u << 23));
const v128 result = eq_brsl | eq_brasl; const v128 result = eq_brsl | eq_brasl;
if (!gv_testz(result)) if (!gv_testz(result))
@ -2118,6 +2122,17 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
} }
} }
} }
if (!gv_testz(eq_br))
{
for (u32 j = 0; j < 4; j++)
{
if (eq_br.u32r[j])
{
branches.push_back(i + j * 4);
}
}
}
} }
calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller) calls.erase(std::remove_if(calls.begin(), calls.end(), [&](u32 caller)
@ -2126,6 +2141,12 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr); return !is_exec_code(caller, ls, base_addr) || !is_exec_code(caller + 4, ls, base_addr);
}), calls.end()); }), calls.end());
branches.erase(std::remove_if(branches.begin(), branches.end(), [&](u32 caller)
{
// Check the validity of the callee code
return !is_exec_code(caller, ls, base_addr);
}), branches.end());
std::vector<u32> addrs; std::vector<u32> addrs;
for (u32 addr : calls) for (u32 addr : calls)
@ -2142,6 +2163,69 @@ std::vector<u32> spu_thread::discover_functions(u32 base_addr, std::span<const u
addrs.push_back(func); addrs.push_back(func);
} }
for (u32 addr : branches)
{
const spu_opcode_t op{read_from_ptr<be_t<u32>>(ls, addr - base_addr)};
const u32 func = op_branch_targets(addr, op)[0];
if (func == umax || addr + 4 == func || func == addr || !addr)
{
continue;
}
// Search for AI R1, +x or OR R3/4, Rx, 0
// Reasoning: AI R1, +x means stack pointer restoration, branch after that is likely a tail call
// R3 and R4 are common function arguments because they are the first two
for (u32 back = addr - 4, it = 5; it && back >= base_addr; back -= 4)
{
const spu_opcode_t test_op{read_from_ptr<be_t<u32>>(ls, back - base_addr)};
const auto type = g_spu_itype.decode(test_op.opcode);
if (type & spu_itype::branch)
{
break;
}
bool is_tail = false;
if (type == spu_itype::AI && test_op.rt == 1u && test_op.ra == 1u)
{
if (test_op.si10 <= 0)
{
break;
}
is_tail = true;
}
else if (!(type & spu_itype::zregmod))
{
const u32 op_rt = type & spu_itype::_quadrop ? +test_op.rt4 : +test_op.rt;
if (op_rt >= 80u && (type != spu_itype::LQD || test_op.ra != 1u))
{
// Modifying non-volatile registers, not a call (and not context restoration)
break;
}
//is_tail = op_rt == 3u || op_rt == 4u;
}
if (!is_tail)
{
continue;
}
if (std::count(addrs.begin(), addrs.end(), func))
{
break;
}
addrs.push_back(func);
break;
}
}
std::sort(addrs.begin(), addrs.end()); std::sort(addrs.begin(), addrs.end());
return addrs; return addrs;

View file

@ -4042,8 +4042,18 @@ bool spu_thread::is_exec_code(u32 addr, std::span<const u8> ls_ptr, u32 base_add
return false; return false;
} }
if (type == spu_itype::STOP && op.rb)
{
return false;
}
if (type & spu_itype::branch) if (type & spu_itype::branch)
{ {
if (type == spu_itype::BR && op.rt && op.rt != 127u)
{
return false;
}
const auto results = op_branch_targets(addr, spu_opcode_t{op}); const auto results = op_branch_targets(addr, spu_opcode_t{op});
if (results[0] == umax) if (results[0] == umax)