mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-13 10:18:40 +12:00
PPU Analyser: compile certain functions on per-instruction basis
PPU LLVM: optimize small blocks
This commit is contained in:
parent
891ebd0cb1
commit
8a029159cd
2 changed files with 101 additions and 16 deletions
|
@ -633,7 +633,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
{
|
{
|
||||||
if (!_seg.addr) continue;
|
if (!_seg.addr) continue;
|
||||||
|
|
||||||
if (value >= _seg.addr && value < _seg.addr + _seg.size)
|
if (value >= start && value < end)
|
||||||
{
|
{
|
||||||
addr_heap.emplace(value);
|
addr_heap.emplace(value);
|
||||||
break;
|
break;
|
||||||
|
@ -1527,6 +1527,25 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
// Decompose functions to basic blocks
|
// Decompose functions to basic blocks
|
||||||
for (auto&& [_, func] : as_rvalue(std::move(fmap)))
|
for (auto&& [_, func] : as_rvalue(std::move(fmap)))
|
||||||
{
|
{
|
||||||
|
if (func.attr & ppu_attr::no_size && entry)
|
||||||
|
{
|
||||||
|
// Disabled for PRX for now
|
||||||
|
const u32 lim = get_limit(func.addr);
|
||||||
|
|
||||||
|
ppu_log.warning("Function 0x%x will be compiled on per-instruction basis (next=0x%x)", func.addr, lim);
|
||||||
|
|
||||||
|
for (u32 addr = func.addr; addr < lim; addr += 4)
|
||||||
|
{
|
||||||
|
auto& block = fmap[addr];
|
||||||
|
block.addr = addr;
|
||||||
|
block.size = 4;
|
||||||
|
block.toc = func.toc;
|
||||||
|
block.attr = ppu_attr::no_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
for (auto [addr, size] : func.blocks)
|
for (auto [addr, size] : func.blocks)
|
||||||
{
|
{
|
||||||
if (!size)
|
if (!size)
|
||||||
|
@ -1583,7 +1602,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
case 109:
|
case 109:
|
||||||
case 110:
|
case 110:
|
||||||
{
|
{
|
||||||
ppu_log.notice("Added block from reloc: 0x%x (0x%x, %u)", target, rel.addr, rel.type);
|
ppu_log.trace("Added block from reloc: 0x%x (0x%x, %u) (heap=%d)", target, rel.addr, rel.type, addr_heap.count(target));
|
||||||
block_queue.emplace_back(target, 0);
|
block_queue.emplace_back(target, 0);
|
||||||
block_set.emplace(target);
|
block_set.emplace(target);
|
||||||
continue;
|
continue;
|
||||||
|
@ -1598,8 +1617,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
u32 exp = start;
|
u32 exp = start;
|
||||||
u32 lim = end;
|
u32 lim = end;
|
||||||
|
|
||||||
// Start with full scan
|
// Start with full scan (disabled for PRX for now)
|
||||||
|
if (entry)
|
||||||
|
{
|
||||||
block_queue.emplace_back(exp, lim);
|
block_queue.emplace_back(exp, lim);
|
||||||
|
}
|
||||||
|
|
||||||
// block_queue may grow
|
// block_queue may grow
|
||||||
for (usz i = 0; i < block_queue.size(); i++)
|
for (usz i = 0; i < block_queue.size(); i++)
|
||||||
|
@ -1731,6 +1753,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
block.addr = exp;
|
block.addr = exp;
|
||||||
block.size = i_pos - exp;
|
block.size = i_pos - exp;
|
||||||
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
|
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
|
||||||
|
|
||||||
|
if (get_limit(exp) == end)
|
||||||
|
{
|
||||||
|
block.attr += ppu_attr::no_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1750,9 +1777,26 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert map to vector (destructive)
|
// Convert map to vector (destructive)
|
||||||
for (auto&& pair : as_rvalue(std::move(fmap)))
|
for (auto&& [_, block] : as_rvalue(std::move(fmap)))
|
||||||
{
|
{
|
||||||
funcs.emplace_back(std::move(pair.second));
|
if (block.attr & ppu_attr::no_size && block.size > 4 && entry)
|
||||||
|
{
|
||||||
|
// Disabled for PRX for now
|
||||||
|
ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size);
|
||||||
|
|
||||||
|
for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4)
|
||||||
|
{
|
||||||
|
auto& i = funcs.emplace_back();
|
||||||
|
i.addr = addr;
|
||||||
|
i.size = 4;
|
||||||
|
i.toc = block.toc;
|
||||||
|
i.attr = ppu_attr::no_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
funcs.emplace_back(std::move(block));
|
||||||
}
|
}
|
||||||
|
|
||||||
ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
|
ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
const ppu_decoder<PPUTranslator> s_ppu_decoder;
|
const ppu_decoder<PPUTranslator> s_ppu_decoder;
|
||||||
|
const ppu_decoder<ppu_itype> s_ppu_itype;
|
||||||
const ppu_decoder<ppu_iname> s_ppu_iname;
|
const ppu_decoder<ppu_iname> s_ppu_iname;
|
||||||
|
|
||||||
PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine)
|
PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine)
|
||||||
|
@ -161,11 +162,45 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||||
const u64 base = m_reloc ? m_reloc->addr : 0;
|
const u64 base = m_reloc ? m_reloc->addr : 0;
|
||||||
m_addr = info.addr - base;
|
m_addr = info.addr - base;
|
||||||
|
|
||||||
|
// Don't emit check in small blocks without terminator
|
||||||
|
bool need_check = info.size >= 16;
|
||||||
|
|
||||||
|
for (u32 addr = m_addr; addr < m_addr + info.size; addr += 4)
|
||||||
|
{
|
||||||
|
const u32 op = vm::read32(vm::cast(addr + base));
|
||||||
|
|
||||||
|
switch (s_ppu_itype.decode(op))
|
||||||
|
{
|
||||||
|
case ppu_itype::UNK:
|
||||||
|
case ppu_itype::ECIWX:
|
||||||
|
case ppu_itype::ECOWX:
|
||||||
|
case ppu_itype::TD:
|
||||||
|
case ppu_itype::TDI:
|
||||||
|
case ppu_itype::TW:
|
||||||
|
case ppu_itype::TWI:
|
||||||
|
case ppu_itype::B:
|
||||||
|
case ppu_itype::BC:
|
||||||
|
case ppu_itype::BCCTR:
|
||||||
|
case ppu_itype::BCLR:
|
||||||
|
case ppu_itype::SC:
|
||||||
|
{
|
||||||
|
need_check = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
m_thread = &*m_function->arg_begin();
|
m_thread = &*m_function->arg_begin();
|
||||||
m_base_loaded = m_ir->CreateLoad(m_base);
|
m_base_loaded = m_ir->CreateLoad(m_base);
|
||||||
|
|
||||||
const auto body = BasicBlock::Create(m_context, "__body", m_function);
|
const auto body = BasicBlock::Create(m_context, "__body", m_function);
|
||||||
|
|
||||||
|
if (need_check)
|
||||||
|
{
|
||||||
// Check status register in the entry block
|
// Check status register in the entry block
|
||||||
const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true);
|
const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true);
|
||||||
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
|
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
|
||||||
|
@ -175,6 +210,12 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||||
m_ir->SetInsertPoint(vcheck);
|
m_ir->SetInsertPoint(vcheck);
|
||||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail);
|
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_ir->CreateBr(body);
|
||||||
|
}
|
||||||
|
|
||||||
m_ir->SetInsertPoint(body);
|
m_ir->SetInsertPoint(body);
|
||||||
|
|
||||||
// Process blocks
|
// Process blocks
|
||||||
|
@ -2990,7 +3031,7 @@ void PPUTranslator::EQV(ppu_opcode_t op)
|
||||||
|
|
||||||
void PPUTranslator::ECIWX(ppu_opcode_t op)
|
void PPUTranslator::ECIWX(ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
SetGpr(op.rd, Call(GetType<u64>(), "__eciwx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
|
UNK(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::LHZUX(ppu_opcode_t op)
|
void PPUTranslator::LHZUX(ppu_opcode_t op)
|
||||||
|
@ -3111,7 +3152,7 @@ void PPUTranslator::ORC(ppu_opcode_t op)
|
||||||
|
|
||||||
void PPUTranslator::ECOWX(ppu_opcode_t op)
|
void PPUTranslator::ECOWX(ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
Call(GetType<void>(), "__ecowx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
|
UNK(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::STHUX(ppu_opcode_t op)
|
void PPUTranslator::STHUX(ppu_opcode_t op)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue