mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-09 16:31:28 +12:00
SPU LLVM: improve constant propagation
Propagate constants in non-volatile registers between chunks Disable function table in Mega mode
This commit is contained in:
parent
a424fcfcf7
commit
d3ad44aec4
1 changed files with 75 additions and 25 deletions
|
@ -1553,9 +1553,28 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
std::array<llvm::StoreInst*, s_reg_max> store{};
|
std::array<llvm::StoreInst*, s_reg_max> store{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct chunk_info
|
||||||
|
{
|
||||||
|
// Callable function
|
||||||
|
llvm::Function* func;
|
||||||
|
|
||||||
|
// Constants in non-volatile registers at the entry point
|
||||||
|
std::array<llvm::Value*, s_reg_max> reg{};
|
||||||
|
|
||||||
|
chunk_info() = default;
|
||||||
|
|
||||||
|
chunk_info(llvm::Function* func)
|
||||||
|
: func(func)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Current block
|
// Current block
|
||||||
block_info* m_block;
|
block_info* m_block;
|
||||||
|
|
||||||
|
// Current chunk
|
||||||
|
chunk_info* m_finfo;
|
||||||
|
|
||||||
// All blocks in the current function chunk
|
// All blocks in the current function chunk
|
||||||
std::unordered_map<u32, block_info, value_hash<u32, 2>> m_blocks;
|
std::unordered_map<u32, block_info, value_hash<u32, 2>> m_blocks;
|
||||||
|
|
||||||
|
@ -1563,7 +1582,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
std::vector<u32> m_block_queue;
|
std::vector<u32> m_block_queue;
|
||||||
|
|
||||||
// All function chunks in current SPU compile unit
|
// All function chunks in current SPU compile unit
|
||||||
std::unordered_map<u32, llvm::Function*, value_hash<u32, 2>> m_functions;
|
std::unordered_map<u32, chunk_info, value_hash<u32, 2>> m_functions;
|
||||||
|
|
||||||
// Function chunk list for processing
|
// Function chunk list for processing
|
||||||
std::vector<u32> m_function_queue;
|
std::vector<u32> m_function_queue;
|
||||||
|
@ -1584,9 +1603,28 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
result->addAttribute(2, llvm::Attribute::NoAlias);
|
result->addAttribute(2, llvm::Attribute::NoAlias);
|
||||||
|
|
||||||
// Enqueue if necessary
|
// Enqueue if necessary
|
||||||
if (m_functions.emplace(addr, result).second)
|
const auto empl = m_functions.emplace(addr, chunk_info{result});
|
||||||
|
|
||||||
|
if (empl.second)
|
||||||
{
|
{
|
||||||
m_function_queue.push_back(addr);
|
m_function_queue.push_back(addr);
|
||||||
|
|
||||||
|
if (m_block && g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
||||||
|
{
|
||||||
|
// Initialize constants for non-volatile registers (TODO)
|
||||||
|
auto& regs = empl.first->second.reg;
|
||||||
|
|
||||||
|
for (u32 i = 80; i <= 127; i++)
|
||||||
|
{
|
||||||
|
if (auto c = llvm::dyn_cast_or_null<llvm::Constant>(m_block->reg[i]))
|
||||||
|
{
|
||||||
|
if (!(find_reg_origin(addr, i, false) >> 31))
|
||||||
|
{
|
||||||
|
regs[i] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
@ -1600,6 +1638,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
|
|
||||||
m_reg_addr.fill(nullptr);
|
m_reg_addr.fill(nullptr);
|
||||||
m_block = nullptr;
|
m_block = nullptr;
|
||||||
|
m_finfo = nullptr;
|
||||||
m_blocks.clear();
|
m_blocks.clear();
|
||||||
m_block_queue.clear();
|
m_block_queue.clear();
|
||||||
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function));
|
m_ir->SetInsertPoint(llvm::BasicBlock::Create(m_context, "", m_function));
|
||||||
|
@ -1769,12 +1808,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return either basic block addr with single dominating value, or negative number of PHI entries
|
// Return either basic block addr with single dominating value, or negative number of PHI entries
|
||||||
u32 find_reg_origin(u32 addr, u32 index)
|
u32 find_reg_origin(u32 addr, u32 index, bool chunk_only = true)
|
||||||
{
|
{
|
||||||
u32 result = -1;
|
u32 result = -1;
|
||||||
|
|
||||||
// Handle entry point specially
|
// Handle entry point specially
|
||||||
if (m_entry_info[addr / 4])
|
if (chunk_only && m_entry_info[addr / 4])
|
||||||
{
|
{
|
||||||
result = addr;
|
result = addr;
|
||||||
}
|
}
|
||||||
|
@ -1791,10 +1830,12 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
{
|
{
|
||||||
for (u32 pred : pfound->second)
|
for (u32 pred : pfound->second)
|
||||||
{
|
{
|
||||||
if (m_entry_map[pred / 4] == root)
|
if (chunk_only && m_entry_map[pred / 4] != root)
|
||||||
{
|
{
|
||||||
m_scan_queue.push_back(pred);
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_scan_queue.push_back(pred);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1831,7 +1872,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
// Enqueue predecessors if register is not modified there
|
// Enqueue predecessors if register is not modified there
|
||||||
for (u32 pred : pfound->second)
|
for (u32 pred : pfound->second)
|
||||||
{
|
{
|
||||||
if (m_entry_map[pred / 4] != root)
|
if (chunk_only && m_entry_map[pred / 4] != root)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1847,7 +1888,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regmod || m_entry_info[addr / 4])
|
if (regmod || (chunk_only && m_entry_info[addr / 4]))
|
||||||
{
|
{
|
||||||
if (result == -1)
|
if (result == -1)
|
||||||
{
|
{
|
||||||
|
@ -2200,7 +2241,8 @@ public:
|
||||||
{
|
{
|
||||||
// Initialize function info
|
// Initialize function info
|
||||||
m_entry = m_function_queue[fi];
|
m_entry = m_function_queue[fi];
|
||||||
set_function(m_functions[m_entry]);
|
set_function(m_functions[m_entry].func);
|
||||||
|
m_finfo = &m_functions[m_entry];
|
||||||
m_ir->CreateBr(add_block(m_entry));
|
m_ir->CreateBr(add_block(m_entry));
|
||||||
|
|
||||||
// Emit instructions for basic blocks
|
// Emit instructions for basic blocks
|
||||||
|
@ -2251,9 +2293,10 @@ public:
|
||||||
if (!value)
|
if (!value)
|
||||||
{
|
{
|
||||||
// Value hasn't been loaded yet
|
// Value hasn't been loaded yet
|
||||||
value = m_ir->CreateLoad(regptr);
|
value = m_finfo->reg[i] ? m_finfo->reg[i] : m_ir->CreateLoad(regptr);
|
||||||
}
|
}
|
||||||
else if (i < 128 && llvm::isa<llvm::Constant>(value))
|
|
||||||
|
if (i < 128 && llvm::isa<llvm::Constant>(value))
|
||||||
{
|
{
|
||||||
// Bitcast the constant
|
// Bitcast the constant
|
||||||
value = make_const_vector(get_const_vector(llvm::cast<llvm::Constant>(value), baddr, i), _phi->getType());
|
value = make_const_vector(get_const_vector(llvm::cast<llvm::Constant>(value), baddr, i), _phi->getType());
|
||||||
|
@ -2279,7 +2322,7 @@ public:
|
||||||
const auto regptr = init_vr(i);
|
const auto regptr = init_vr(i);
|
||||||
const auto cblock = m_ir->GetInsertBlock();
|
const auto cblock = m_ir->GetInsertBlock();
|
||||||
m_ir->SetInsertPoint(m_function->getEntryBlock().getTerminator());
|
m_ir->SetInsertPoint(m_function->getEntryBlock().getTerminator());
|
||||||
const auto value = m_ir->CreateLoad(regptr);
|
const auto value = m_finfo->reg[i] ? m_finfo->reg[i] : m_ir->CreateLoad(regptr);
|
||||||
m_ir->SetInsertPoint(cblock);
|
m_ir->SetInsertPoint(cblock);
|
||||||
_phi->addIncoming(value, &m_function->getEntryBlock());
|
_phi->addIncoming(value, &m_function->getEntryBlock());
|
||||||
}
|
}
|
||||||
|
@ -2295,6 +2338,11 @@ public:
|
||||||
m_block->reg[i] = bfound->second.reg[i];
|
m_block->reg[i] = bfound->second.reg[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (baddr == m_entry)
|
||||||
|
{
|
||||||
|
// Passthrough constant from a different chunk
|
||||||
|
m_block->reg[i] = m_finfo->reg[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Emit state check if necessary (TODO: more conditions)
|
// Emit state check if necessary (TODO: more conditions)
|
||||||
|
@ -2380,10 +2428,19 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
chunks.push_back(null);
|
chunks.push_back(null);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
chunks.push_back(found->second.func);
|
||||||
|
|
||||||
|
// If a chunk has incoming constants, we can't add it to the function table (TODO)
|
||||||
|
for (const auto c : found->second.reg)
|
||||||
{
|
{
|
||||||
chunks.push_back(found->second);
|
if (c != nullptr)
|
||||||
|
{
|
||||||
|
chunks.back() = null;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2406,7 +2463,7 @@ public:
|
||||||
|
|
||||||
for (const auto& func : m_functions)
|
for (const auto& func : m_functions)
|
||||||
{
|
{
|
||||||
pm.run(*func.second);
|
pm.run(*func.second.func);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear context (TODO)
|
// Clear context (TODO)
|
||||||
|
@ -4498,18 +4555,11 @@ public:
|
||||||
|
|
||||||
llvm::Value* ptr = m_ir->CreateGEP(disp, m_ir->CreateLShr(ad64, 2, "", true));
|
llvm::Value* ptr = m_ir->CreateGEP(disp, m_ir->CreateLShr(ad64, 2, "", true));
|
||||||
|
|
||||||
if (g_cfg.core.spu_block_size != spu_block_size_type::safe)
|
if (g_cfg.core.spu_block_size == spu_block_size_type::giga)
|
||||||
{
|
{
|
||||||
// Try to load chunk address from the function table
|
// Try to load chunk address from the function table
|
||||||
llvm::Value* index = ad64;
|
const auto use_ftable = m_ir->CreateICmpULT(ad64, m_ir->getInt64(m_size));
|
||||||
|
ptr = m_ir->CreateSelect(use_ftable, m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(ad64, 2, "", true)}), ptr);
|
||||||
if (g_cfg.core.spu_block_size != spu_block_size_type::giga)
|
|
||||||
{
|
|
||||||
index = m_ir->CreateSub(ad64, m_ir->getInt64(m_function_queue[0]));
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto use_ftable = m_ir->CreateICmpULT(index, m_ir->getInt64(m_size));
|
|
||||||
ptr = m_ir->CreateSelect(use_ftable, m_ir->CreateGEP(m_function_table, {m_ir->getInt64(0), m_ir->CreateLShr(index, 2, "", true)}), ptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tail(m_ir->CreateLoad(ptr));
|
tail(m_ir->CreateLoad(ptr));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue