mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 10:18:40 +12:00
SPU LLVM: Optimize branches following byteswaps
- The first element can be extracted via vmovd rather than vpextrd, which saves 1 uop.
This commit is contained in:
parent
f9ab077908
commit
86716dc37b
1 changed files with 59 additions and 1 deletions
|
@ -9232,6 +9232,20 @@ public:
|
||||||
{
|
{
|
||||||
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
|
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
|
||||||
|
const auto rt = get_vr<u8[16]>(op.rt);
|
||||||
|
|
||||||
|
// Checking for zero doeesn't care about the order of the bytes,
|
||||||
|
// so load the data before it's byteswapped
|
||||||
|
if (auto [ok, as] = match_expr(rt, byteswap(match<u8[16]>())); ok)
|
||||||
|
{
|
||||||
|
m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
const auto cond = eval(extract(bitcast<u32[4]>(as), 0) == 0);
|
||||||
|
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
|
||||||
|
const auto target = add_block_indirect(op, addr);
|
||||||
|
m_ir->CreateCondBr(cond.value, target, add_block_next());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Check sign bit instead (optimization)
|
// Check sign bit instead (optimization)
|
||||||
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
||||||
{
|
{
|
||||||
|
@ -9263,6 +9277,21 @@ public:
|
||||||
{
|
{
|
||||||
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
|
if (m_block) m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
|
||||||
|
const auto rt = get_vr<u8[16]>(op.rt);
|
||||||
|
|
||||||
|
// Checking for zero doeesn't care about the order of the bytes,
|
||||||
|
// so load the data before it's byteswapped
|
||||||
|
if (auto [ok, as] = match_expr(rt, byteswap(match<u8[16]>())); ok)
|
||||||
|
{
|
||||||
|
m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
const auto cond = eval(extract(bitcast<u32[4]>(as), 0) != 0);
|
||||||
|
const auto addr = eval(extract(get_vr(op.ra), 3) & 0x3fffc);
|
||||||
|
const auto target = add_block_indirect(op, addr);
|
||||||
|
m_ir->CreateCondBr(cond.value, target, add_block_next());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Check sign bit instead (optimization)
|
// Check sign bit instead (optimization)
|
||||||
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
||||||
{
|
{
|
||||||
|
@ -9483,6 +9512,21 @@ public:
|
||||||
|
|
||||||
const u32 target = spu_branch_target(m_pos, op.i16);
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
||||||
|
|
||||||
|
const auto rt = get_vr<u8[16]>(op.rt);
|
||||||
|
|
||||||
|
// Checking for zero doeesn't care about the order of the bytes,
|
||||||
|
// so load the data before it's byteswapped
|
||||||
|
if (auto [ok, as] = match_expr(rt, byteswap(match<u8[16]>())); ok)
|
||||||
|
{
|
||||||
|
if (target != m_pos + 4)
|
||||||
|
{
|
||||||
|
m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
const auto cond = eval(extract(bitcast<u32[4]>(as), 0) == 0);
|
||||||
|
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check sign bit instead (optimization)
|
// Check sign bit instead (optimization)
|
||||||
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
||||||
{
|
{
|
||||||
|
@ -9527,6 +9571,21 @@ public:
|
||||||
|
|
||||||
const u32 target = spu_branch_target(m_pos, op.i16);
|
const u32 target = spu_branch_target(m_pos, op.i16);
|
||||||
|
|
||||||
|
const auto rt = get_vr<u8[16]>(op.rt);
|
||||||
|
|
||||||
|
// Checking for zero doeesn't care about the order of the bytes,
|
||||||
|
// so load the data before it's byteswapped
|
||||||
|
if (auto [ok, as] = match_expr(rt, byteswap(match<u8[16]>())); ok)
|
||||||
|
{
|
||||||
|
if (target != m_pos + 4)
|
||||||
|
{
|
||||||
|
m_block->block_end = m_ir->GetInsertBlock();
|
||||||
|
const auto cond = eval(extract(bitcast<u32[4]>(as), 0) != 0);
|
||||||
|
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check sign bit instead (optimization)
|
// Check sign bit instead (optimization)
|
||||||
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
if (match_vr<s32[4], s64[2]>(op.rt, [&](auto c, auto MP)
|
||||||
{
|
{
|
||||||
|
@ -9583,7 +9642,6 @@ public:
|
||||||
m_block->block_end = m_ir->GetInsertBlock();
|
m_block->block_end = m_ir->GetInsertBlock();
|
||||||
const auto a = get_vr<s8[16]>(op.rt);
|
const auto a = get_vr<s8[16]>(op.rt);
|
||||||
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) == 0);
|
const auto cond = eval((bitcast<s16>(trunc<bool[16]>(a)) & 0x3000) == 0);
|
||||||
//const auto cond = eval((m & 0x3000) == 0);
|
|
||||||
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
|
m_ir->CreateCondBr(cond.value, add_block(target), add_block(m_pos + 4));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue