PPU/LLVM: Simplify trace management

This commit is contained in:
Vincent Lejeune 2015-08-23 17:50:16 +02:00 committed by Nekotekina
parent f2c8db75bf
commit 02a1bffc12
2 changed files with 159 additions and 528 deletions

View file

@ -75,7 +75,7 @@ Compiler::~Compiler() {
delete m_llvm_context; delete m_llvm_context;
} }
std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits) { std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::string & name, u32 start_address, u32 instruction_count, bool generate_linkable_exits) {
auto compilation_start = std::chrono::high_resolution_clock::now(); auto compilation_start = std::chrono::high_resolution_clock::now();
m_module = new llvm::Module("Module", *m_llvm_context); m_module = new llvm::Module("Module", *m_llvm_context);
@ -118,7 +118,6 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
fpm->add(createCFGSimplificationPass()); fpm->add(createCFGSimplificationPass());
fpm->doInitialization(); fpm->doInitialization();
m_state.cfg = &cfg;
m_state.generate_linkable_exits = generate_linkable_exits; m_state.generate_linkable_exits = generate_linkable_exits;
// Create the function // Create the function
@ -132,34 +131,32 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
// Create the entry block and add code to branch to the first instruction // Create the entry block and add code to branch to the first instruction
m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0)); m_ir_builder->SetInsertPoint(GetBasicBlockFromAddress(0));
m_ir_builder->CreateBr(GetBasicBlockFromAddress(cfg.start_address)); m_ir_builder->CreateBr(GetBasicBlockFromAddress(start_address));
// Used to decode instructions // Used to decode instructions
PPUDisAsm dis_asm(CPUDisAsm_DumpMode); PPUDisAsm dis_asm(CPUDisAsm_DumpMode);
dis_asm.offset = vm::get_ptr<u8>(cfg.start_address); dis_asm.offset = vm::get_ptr<u8>(start_address);
m_recompilation_engine.Log() << "Recompiling block :\n\n"; m_recompilation_engine.Log() << "Recompiling block :\n\n";
// Convert each instruction in the CFG to LLVM IR // Convert each instruction in the CFG to LLVM IR
std::vector<PHINode *> exit_instr_list; std::vector<PHINode *> exit_instr_list;
for (u32 instr_i : cfg.instruction_addresses) { for (u32 instructionAddress = start_address; instructionAddress < start_address + instruction_count * 4; instructionAddress += 4) {
m_state.hit_branch_instruction = false; m_state.hit_branch_instruction = false;
m_state.current_instruction_address = instr_i; m_state.current_instruction_address = instructionAddress;
BasicBlock *instr_bb = GetBasicBlockFromAddress(m_state.current_instruction_address); BasicBlock *instr_bb = GetBasicBlockFromAddress(instructionAddress);
m_ir_builder->SetInsertPoint(instr_bb); m_ir_builder->SetInsertPoint(instr_bb);
if (instr_bb->empty()) { u32 instr = vm::ps3::read32(instructionAddress);
u32 instr = vm::ps3::read32(m_state.current_instruction_address);
// Dump PPU opcode // Dump PPU opcode
dis_asm.dump_pc = m_state.current_instruction_address * 4; dis_asm.dump_pc = instructionAddress;
(*PPU_instr::main_list)(&dis_asm, instr); (*PPU_instr::main_list)(&dis_asm, instr);
m_recompilation_engine.Log() << dis_asm.last_opcode; m_recompilation_engine.Log() << dis_asm.last_opcode;
Decode(instr); Decode(instr);
if (!m_state.hit_branch_instruction) if (!m_state.hit_branch_instruction)
m_ir_builder->CreateBr(GetBasicBlockFromAddress(m_state.current_instruction_address + 4)); m_ir_builder->CreateBr(GetBasicBlockFromAddress(instructionAddress + 4));
}
} }
// Generate exit logic for all empty blocks // Generate exit logic for all empty blocks
@ -179,7 +176,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
if (generate_linkable_exits) { if (generate_linkable_exits) {
Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
Value *ret_i32 = IndirectCall(m_state.current_instruction_address, context_i64, false); Value *ret_i32 = IndirectCall(m_state.current_instruction_address, context_i64, false);
Value *cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0)); Value *cmp_i1 = m_ir_builder->CreateICmpNE(ret_i32, m_ir_builder->getInt32(0));
BasicBlock *then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_0"); BasicBlock *then_bb = GetBasicBlockFromAddress(m_state.current_instruction_address, "then_0");
@ -188,7 +185,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
m_ir_builder->SetInsertPoint(then_bb); m_ir_builder->SetInsertPoint(then_bb);
context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty()); context_i64 = m_ir_builder->CreateZExt(ret_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb); m_ir_builder->CreateBr(merge_bb);
@ -215,7 +212,7 @@ std::pair<Executable, llvm::ExecutionEngine *> Compiler::Compile(const std::stri
m_ir_builder->SetInsertPoint(then_bb); m_ir_builder->SetInsertPoint(then_bb);
Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty()); Value *context_i64 = m_ir_builder->CreateZExt(exit_instr_i32, m_ir_builder->getInt64Ty());
context_i64 = m_ir_builder->CreateOr(context_i64, (u64)cfg.function_address << 32); context_i64 = m_ir_builder->CreateOr(context_i64, (u64)start_address << 32);
m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64); m_ir_builder->CreateCall2(m_execute_unknown_block, m_state.args[CompileTaskState::Args::State], context_i64);
m_ir_builder->CreateBr(merge_bb); m_ir_builder->CreateBr(merge_bb);
@ -353,10 +350,10 @@ const Executable RecompilationEngine::GetCompiledExecutableIfAvailable(u32 addre
return std::get<0>(It->second); return std::get<0>(It->second);
} }
void RecompilationEngine::NotifyTrace(ExecutionTrace * execution_trace) { void RecompilationEngine::NotifyBlockStart(u32 address) {
{ {
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock); std::lock_guard<std::mutex> lock(m_pending_address_start_lock);
m_pending_execution_traces.push_back(execution_trace); m_pending_address_start.push_back(address);
} }
if (!joinable()) { if (!joinable()) {
@ -384,22 +381,16 @@ void RecompilationEngine::Task() {
auto start = std::chrono::high_resolution_clock::now(); auto start = std::chrono::high_resolution_clock::now();
while (joinable() && !Emu.IsStopped()) { while (joinable() && !Emu.IsStopped()) {
bool work_done_this_iteration = false; bool work_done_this_iteration = false;
ExecutionTrace * execution_trace = nullptr; std::list <u32> m_current_execution_traces;
{ {
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock); std::lock_guard<std::mutex> lock(m_pending_address_start_lock);
m_current_execution_traces.swap(m_pending_address_start);
auto i = m_pending_execution_traces.begin();
if (i != m_pending_execution_traces.end()) {
execution_trace = *i;
m_pending_execution_traces.erase(i);
}
} }
if (execution_trace) { if (!m_current_execution_traces.empty()) {
ProcessExecutionTrace(*execution_trace); for (u32 address : m_current_execution_traces)
work_done_this_iteration = true; work_done_this_iteration |= ProcessExecutionTrace(address);
delete execution_trace;
} }
if (!work_done_this_iteration) { if (!work_done_this_iteration) {
@ -429,122 +420,116 @@ void RecompilationEngine::Task() {
s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this. s_the_instance = nullptr; // Can cause deadlock if this is the last instance. Need to fix this.
} }
void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) { bool RecompilationEngine::ProcessExecutionTrace(u32 address) {
auto execution_trace_id = execution_trace.GetId(); auto It = m_block_table.find(address);
auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id); if (It == m_block_table.end())
if (processed_execution_trace_i == m_processed_execution_traces.end()) { It = m_block_table.emplace(address, BlockEntry(address)).first;
Log() << "Trace: " << execution_trace.ToString() << "\n"; BlockEntry &block = It->second;
// Find the function block if (!block.is_compiled) {
BlockEntry key(execution_trace.function_address, execution_trace.function_address); block.num_hits++;
auto block_i = m_block_table.find(&key); if (block.num_hits >= Ini.LLVMThreshold.GetValue()) {
if (block_i == m_block_table.end()) { CompileBlock(block);
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address)); return true;
}
auto function_block = *block_i;
block_i = m_block_table.end();
auto split_trace = false;
std::vector<BlockEntry *> tmp_block_list;
for (auto trace_i = execution_trace.entries.begin(); trace_i != execution_trace.entries.end(); trace_i++) {
if (trace_i->type == ExecutionTraceEntry::Type::CompiledBlock) {
block_i = m_block_table.end();
split_trace = true;
}
if (block_i == m_block_table.end()) {
BlockEntry key(trace_i->GetPrimaryAddress(), execution_trace.function_address);
block_i = m_block_table.find(&key);
if (block_i == m_block_table.end()) {
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address));
}
tmp_block_list.push_back(*block_i);
}
const ExecutionTraceEntry * next_trace = nullptr;
if (trace_i + 1 != execution_trace.entries.end()) {
next_trace = &(*(trace_i + 1));
}
else if (!split_trace && execution_trace.type == ExecutionTrace::Type::Loop) {
next_trace = &(*(execution_trace.entries.begin()));
}
UpdateControlFlowGraph((*block_i)->cfg, *trace_i, next_trace);
if (*block_i != function_block) {
UpdateControlFlowGraph(function_block->cfg, *trace_i, next_trace);
} }
} }
return false;
processed_execution_trace_i = m_processed_execution_traces.insert(m_processed_execution_traces.end(), std::make_pair(execution_trace_id, std::move(tmp_block_list)));
}
for (auto i = processed_execution_trace_i->second.begin(); i != processed_execution_trace_i->second.end(); i++) {
if (!(*i)->is_compiled) {
(*i)->num_hits++;
if ((*i)->num_hits >= Ini.LLVMThreshold.GetValue()) {
CompileBlock(*(*i));
}
}
}
// TODO:: Syphurith: It is said that just remove_if would cause some troubles.. I don't know if that would cause Memleak. From CppCheck:
// The return value of std::remove_if() is ignored. This function returns an iterator to the end of the range containing those elements that should be kept.
// Elements past new end remain valid but with unspecified values. Use the erase method of the container to delete them.
std::remove_if(processed_execution_trace_i->second.begin(), processed_execution_trace_i->second.end(), [](const BlockEntry * b)->bool { return b->is_compiled; });
} }
void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry) { /**
if (this_entry.type == ExecutionTraceEntry::Type::Instruction) { * This code is inspired from Dolphin PPC Analyst
cfg.instruction_addresses.insert(this_entry.GetPrimaryAddress()); */
inline s32 SignExt16(s16 x) { return (s32)(s16)x; }
inline s32 SignExt26(u32 x) { return x & 0x2000000 ? (s32)(x | 0xFC000000) : (s32)(x); }
if (next_entry) {
if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { bool RecompilationEngine::AnalyseBlock(BlockEntry &functionData, size_t maxSize)
if (next_entry->GetPrimaryAddress() != (this_entry.GetPrimaryAddress() + 4)) { {
cfg.branches[this_entry.GetPrimaryAddress()].insert(next_entry->GetPrimaryAddress()); u32 startAddress = functionData.address;
u32 farthestBranchTarget = startAddress;
functionData.instructionCount = 0;
functionData.calledFunctions.clear();
functionData.is_analysed = true;
functionData.is_compilable_function = true;
Log() << "Analysing " << (void*)(uint64_t)startAddress << "\n";
for (size_t instructionAddress = startAddress; instructionAddress < startAddress + maxSize; instructionAddress += 4)
{
u32 instr = vm::ps3::read32((u32)instructionAddress);
functionData.instructionCount++;
if (instr == PPU_instr::implicts::BLR() && instructionAddress >= farthestBranchTarget && functionData.is_compilable_function)
{
Log() << "Analysis: Block is compilable into a function \n";
return true;
}
else if (PPU_instr::fields::GD_13(instr) == PPU_opcodes::G_13Opcodes::BCCTR)
{
if (!PPU_instr::fields::LK(instr))
{
Log() << "Analysis: indirect branching found \n";
functionData.is_compilable_function = false;
return true;
} }
} }
else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { else if (PPU_instr::fields::OPCD(instr) == PPU_opcodes::PPU_MainOpcodes::BC)
cfg.calls[this_entry.data.instruction.address].insert(next_entry->GetPrimaryAddress()); {
} u32 target = SignExt16(PPU_instr::fields::BD(instr));
} if (!PPU_instr::fields::AA(instr)) // Absolute address
} target += (u32)instructionAddress;
else if (this_entry.type == ExecutionTraceEntry::Type::CompiledBlock) { if (target > farthestBranchTarget && !PPU_instr::fields::LK(instr))
if (next_entry) { farthestBranchTarget = target;
if (next_entry->type == ExecutionTraceEntry::Type::Instruction || next_entry->type == ExecutionTraceEntry::Type::CompiledBlock) { }
cfg.branches[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); else if (PPU_instr::fields::OPCD(instr) == PPU_opcodes::PPU_MainOpcodes::B)
} {
else if (next_entry->type == ExecutionTraceEntry::Type::FunctionCall) { u32 target = SignExt26(PPU_instr::fields::LL(instr));
cfg.calls[this_entry.data.compiled_block.exit_address].insert(next_entry->GetPrimaryAddress()); if (!PPU_instr::fields::AA(instr)) // Absolute address
} target += (u32)instructionAddress;
if (!PPU_instr::fields::LK(instr))
{
if (target < startAddress)
{
Log() << "Analysis: branch to previous block\n";
functionData.is_compilable_function = false;
return true;
}
else if (target > farthestBranchTarget)
farthestBranchTarget = target;
}
else
functionData.calledFunctions.insert(target);
} }
} }
Log() << "Analysis: maxSize reached \n";
functionData.is_compilable_function = false;
return true;
} }
void RecompilationEngine::CompileBlock(BlockEntry & block_entry) { void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
Log() << "Compile: " << block_entry.ToString() << "\n"; if (block_entry.is_analysed)
Log() << "CFG: " << block_entry.cfg.ToString() << "\n"; return;
if (!AnalyseBlock(block_entry))
return;
Log() << "Compile: " << block_entry.ToString() << "\n";
const std::pair<Executable, llvm::ExecutionEngine *> &compileResult = const std::pair<Executable, llvm::ExecutionEngine *> &compileResult =
m_compiler.Compile(fmt::format("fn_0x%08X", block_entry.cfg.start_address), block_entry.cfg, m_compiler.Compile(fmt::format("fn_0x%08X", block_entry.address), block_entry.address, block_entry.instructionCount, false /*generate_linkable_exits*/);
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
// If entry doesn't exist, create it (using lock) // If entry doesn't exist, create it (using lock)
std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(block_entry.cfg.start_address); std::unordered_map<u32, ExecutableStorage>::iterator It = m_address_to_function.find(block_entry.address);
if (It == m_address_to_function.end()) if (It == m_address_to_function.end())
{ {
std::lock_guard<std::mutex> lock(m_address_to_function_lock); std::lock_guard<std::mutex> lock(m_address_to_function_lock);
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = nullptr; std::get<1>(m_address_to_function[block_entry.address]) = nullptr;
if (!isAddressCommited(block_entry.cfg.start_address / 4)) if (!isAddressCommited(block_entry.address / 4))
commitAddress(block_entry.cfg.start_address / 4); commitAddress(block_entry.address / 4);
} }
std::get<1>(m_address_to_function[block_entry.cfg.start_address]) = std::unique_ptr<llvm::ExecutionEngine>(compileResult.second); std::get<1>(m_address_to_function[block_entry.address]) = std::unique_ptr<llvm::ExecutionEngine>(compileResult.second);
std::get<0>(m_address_to_function[block_entry.cfg.start_address]) = compileResult.first; std::get<0>(m_address_to_function[block_entry.address]) = compileResult.first;
std::get<3>(m_address_to_function[block_entry.cfg.start_address]) = m_currentId; std::get<3>(m_address_to_function[block_entry.address]) = m_currentId;
Log() << "ID IS " << m_currentId << "\n"; Log() << "Associating " << (void*)(uint64_t)block_entry.address << " with ID " << m_currentId << "\n";
m_currentId++; m_currentId++;
block_entry.last_compiled_cfg_size = block_entry.cfg.GetSize();
block_entry.is_compiled = true; block_entry.is_compiled = true;
FunctionCache[block_entry.cfg.start_address / 4] = compileResult.first; FunctionCache[block_entry.address / 4] = compileResult.first;
} }
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() { std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
@ -557,86 +542,6 @@ std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
return s_the_instance; return s_the_instance;
} }
Tracer::Tracer()
: m_recompilation_engine(RecompilationEngine::GetInstance()) {
m_stack.reserve(100);
}
Tracer::~Tracer() {
Terminate();
}
void Tracer::Trace(TraceType trace_type, u32 arg1, u32 arg2) {
ExecutionTrace * execution_trace = nullptr;
switch (trace_type) {
case TraceType::CallFunction:
// arg1 is address of the function
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::FunctionCall, arg1));
break;
case TraceType::EnterFunction:
// arg1 is address of the function
m_stack.push_back(new ExecutionTrace(arg1));
break;
case TraceType::ExitFromCompiledFunction:
// arg1 is address of function.
// arg2 is the address of the exit instruction.
if (arg2) {
m_stack.push_back(new ExecutionTrace(arg1));
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2));
}
break;
case TraceType::Return:
// No args used
execution_trace = m_stack.back();
execution_trace->type = ExecutionTrace::Type::Linear;
m_stack.pop_back();
break;
case TraceType::Instruction:
// arg1 is the address of the instruction
for (int i = (int)m_stack.back()->entries.size() - 1; i >= 0; i--) {
if ((m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::Instruction && m_stack.back()->entries[i].data.instruction.address == arg1) ||
(m_stack.back()->entries[i].type == ExecutionTraceEntry::Type::CompiledBlock && m_stack.back()->entries[i].data.compiled_block.entry_address == arg1)) {
// Found a loop
execution_trace = new ExecutionTrace(m_stack.back()->function_address);
execution_trace->type = ExecutionTrace::Type::Loop;
std::copy(m_stack.back()->entries.begin() + i, m_stack.back()->entries.end(), std::back_inserter(execution_trace->entries));
m_stack.back()->entries.erase(m_stack.back()->entries.begin() + i + 1, m_stack.back()->entries.end());
break;
}
}
if (!execution_trace) {
// A loop was not found
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::Instruction, arg1));
}
break;
case TraceType::ExitFromCompiledBlock:
// arg1 is address of the compiled block.
// arg2 is the address of the exit instruction.
m_stack.back()->entries.push_back(ExecutionTraceEntry(ExecutionTraceEntry::Type::CompiledBlock, arg1, arg2));
if (arg2 == 0) {
// Return from function
execution_trace = m_stack.back();
execution_trace->type = ExecutionTrace::Type::Linear;
m_stack.pop_back();
}
break;
default:
assert(0);
break;
}
if (execution_trace) {
m_recompilation_engine->NotifyTrace(execution_trace);
}
}
void Tracer::Terminate() {
// TODO: Notify recompilation engine
}
ppu_recompiler_llvm::CPUHybridDecoderRecompiler::CPUHybridDecoderRecompiler(PPUThread & ppu) ppu_recompiler_llvm::CPUHybridDecoderRecompiler::CPUHybridDecoderRecompiler(PPUThread & ppu)
: m_ppu(ppu) : m_ppu(ppu)
, m_interpreter(new PPUInterpreter(ppu)) , m_interpreter(new PPUInterpreter(ppu))
@ -656,7 +561,6 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::DecodeMemory(const u32 addr
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteFunction(PPUThread * ppu_state, u64 context) { u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteFunction(PPUThread * ppu_state, u64 context) {
auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); auto execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
execution_engine->m_tracer.Trace(Tracer::TraceType::EnterFunction, ppu_state->PC, 0);
return ExecuteTillReturn(ppu_state, 0); return ExecuteTillReturn(ppu_state, 0);
} }
@ -687,8 +591,7 @@ static BranchType GetBranchTypeFromInstruction(u32 instruction)
u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread * ppu_state, u64 context) { u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread * ppu_state, u64 context) {
CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder(); CPUHybridDecoderRecompiler *execution_engine = (CPUHybridDecoderRecompiler *)ppu_state->GetDecoder();
if (context) execution_engine->m_recompilation_engine->NotifyBlockStart(ppu_state->PC);
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledFunction, context >> 32, context & 0xFFFFFFFF);
while (PollStatus(ppu_state) == false) { while (PollStatus(ppu_state) == false) {
const Executable executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC); const Executable executable = execution_engine->m_recompilation_engine->GetCompiledExecutableIfAvailable(ppu_state->PC);
@ -696,12 +599,11 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
{ {
auto entry = ppu_state->PC; auto entry = ppu_state->PC;
u32 exit = (u32)executable(ppu_state, 0); u32 exit = (u32)executable(ppu_state, 0);
execution_engine->m_tracer.Trace(Tracer::TraceType::ExitFromCompiledBlock, entry, exit);
if (exit == 0) if (exit == 0)
return 0; return 0;
execution_engine->m_recompilation_engine->NotifyBlockStart(ppu_state->PC);
continue; continue;
} }
execution_engine->m_tracer.Trace(Tracer::TraceType::Instruction, ppu_state->PC, 0);
u32 instruction = vm::ps3::read32(ppu_state->PC); u32 instruction = vm::ps3::read32(ppu_state->PC);
u32 oldPC = ppu_state->PC; u32 oldPC = ppu_state->PC;
execution_engine->m_decoder.Decode(instruction); execution_engine->m_decoder.Decode(instruction);
@ -710,11 +612,9 @@ u32 ppu_recompiler_llvm::CPUHybridDecoderRecompiler::ExecuteTillReturn(PPUThread
switch (branch_type) { switch (branch_type) {
case BranchType::Return: case BranchType::Return:
execution_engine->m_tracer.Trace(Tracer::TraceType::Return, 0, 0);
if (Emu.GetCPUThreadStop() == ppu_state->PC) ppu_state->fast_stop(); if (Emu.GetCPUThreadStop() == ppu_state->PC) ppu_state->fast_stop();
return 0; return 0;
case BranchType::FunctionCall: { case BranchType::FunctionCall: {
execution_engine->m_tracer.Trace(Tracer::TraceType::CallFunction, ppu_state->PC, 0);
ExecuteFunction(ppu_state, 0); ExecuteFunction(ppu_state, 0);
break; break;
} }

View file

@ -26,233 +26,9 @@
namespace ppu_recompiler_llvm { namespace ppu_recompiler_llvm {
class Compiler; class Compiler;
class RecompilationEngine; class RecompilationEngine;
class Tracer;
class ExecutionEngine; class ExecutionEngine;
struct PPUState; struct PPUState;
/// An entry in an execution trace
struct ExecutionTraceEntry {
/// Data associated with the entry. This is discriminated by type.
union {
struct Instruction {
u32 address;
} instruction;
struct FunctionCall {
u32 address;
} function_call;
struct CompiledBlock {
u32 entry_address;
u32 exit_address;
} compiled_block;
} data;
/// The type of the entry
enum class Type {
FunctionCall,
Instruction,
CompiledBlock,
} type;
ExecutionTraceEntry(Type type, u32 arg1, u32 arg2 = 0)
: type(type) {
switch (type) {
case Type::Instruction:
data.instruction.address = arg1;
break;
case Type::FunctionCall:
data.function_call.address = arg1;
break;
case Type::CompiledBlock:
data.compiled_block.entry_address = arg1;
data.compiled_block.exit_address = arg2;
break;
default:
assert(0);
break;
}
}
u32 GetPrimaryAddress() const {
switch (type) {
case Type::Instruction:
return data.instruction.address;
case Type::FunctionCall:
return data.function_call.address;
case Type::CompiledBlock:
return data.compiled_block.entry_address;
default:
assert(0);
return 0;
}
}
std::string ToString() const {
switch (type) {
case Type::Instruction:
return fmt::format("I:0x%08X", data.instruction.address);
case Type::FunctionCall:
return fmt::format("F:0x%08X", data.function_call.address);
case Type::CompiledBlock:
return fmt::format("C:0x%08X-0x%08X", data.compiled_block.entry_address, data.compiled_block.exit_address);
default:
assert(0);
return "";
}
}
u64 hash() const {
u64 hash = ((u64)type << 32);
switch (type) {
case Type::Instruction:
hash |= data.instruction.address;
break;
case Type::FunctionCall:
hash |= data.function_call.address;
break;
case Type::CompiledBlock:
hash = data.compiled_block.exit_address;
hash <<= 32;
hash |= data.compiled_block.entry_address;
break;
default:
assert(0);
break;
}
return hash;
}
};
/// An execution trace.
struct ExecutionTrace {
/// Unique id of an execution trace;
typedef u64 Id;
/// The function to which this trace belongs
u32 function_address;
/// Execution trace type
enum class Type {
Linear,
Loop,
} type;
/// entries in the trace
std::vector<ExecutionTraceEntry> entries;
ExecutionTrace(u32 address)
: function_address(address) {
}
std::string ToString() const {
auto s = fmt::format("0x%08X %s ->", function_address, type == ExecutionTrace::Type::Loop ? "Loop" : "Linear");
for (auto i = 0; i < entries.size(); i++) {
s += " " + entries[i].ToString();
}
return s;
}
Id GetId() const {
Id id = 0;
for (auto i = entries.begin(); i != entries.end(); i++) {
id ^= i->hash();
id <<= 1;
}
return id;
}
};
/// A control flow graph
struct ControlFlowGraph {
/// Address of the first instruction
u32 start_address;
/// Address of the function to which this CFG belongs to
u32 function_address;
/// Set of addresses of the instructions in the CFG
std::set<u32> instruction_addresses;
/// Branches in the CFG.
/// Key is the address of an instruction
/// Data is the set of all instructions to which this instruction branches to.
std::map<u32, std::set<u32>> branches;
/// Function calls in the CFG
/// Key is the address of an instruction
/// Data is the set of all functions which this instruction invokes.
std::map<u32, std::set<u32>> calls;
ControlFlowGraph(u32 start_address, u32 function_address)
: start_address(start_address)
, function_address(function_address) {
}
void operator += (const ControlFlowGraph & other) {
for (auto i = other.instruction_addresses.begin(); i != other.instruction_addresses.end(); i++) {
instruction_addresses.insert(*i);
}
for (auto i = other.branches.begin(); i != other.branches.end(); i++) {
auto j = branches.find(i->first);
if (j == branches.end()) {
j = branches.insert(branches.begin(), std::make_pair(i->first, std::set<u32>()));
}
for (auto k = i->second.begin(); k != i->second.end(); k++) {
j->second.insert(*k);
}
}
for (auto i = other.calls.begin(); i != other.calls.end(); i++) {
auto j = calls.find(i->first);
if (j == calls.end()) {
j = calls.insert(calls.begin(), std::make_pair(i->first, std::set<u32>()));
}
for (auto k = i->second.begin(); k != i->second.end(); k++) {
j->second.insert(*k);
}
}
}
std::string ToString() const {
auto s = fmt::format("0x%08X (0x%08X): Size=%u ->", start_address, function_address, GetSize());
for (auto i = instruction_addresses.begin(); i != instruction_addresses.end(); i++) {
s += fmt::format(" 0x%08X", *i);
}
s += "\nBranches:";
for (auto i = branches.begin(); i != branches.end(); i++) {
s += fmt::format("\n0x%08X ->", i->first);
for (auto j = i->second.begin(); j != i->second.end(); j++) {
s += fmt::format(" 0x%08X", *j);
}
}
s += "\nCalls:";
for (auto i = calls.begin(); i != calls.end(); i++) {
s += fmt::format("\n0x%08X ->", i->first);
for (auto j = i->second.begin(); j != i->second.end(); j++) {
s += fmt::format(" 0x%08X", *j);
}
}
return s;
}
/// Get the size of the CFG. The size is a score of how large the CFG is and increases everytime
/// a node or an edge is added to the CFG.
size_t GetSize() const {
return instruction_addresses.size() + branches.size() + calls.size();
}
};
enum class BranchType { enum class BranchType {
NonBranch, NonBranch,
LocalBranch, LocalBranch,
@ -295,7 +71,7 @@ namespace ppu_recompiler_llvm {
* Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it * Compile a code fragment described by a cfg and return an executable and the ExecutionEngine storing it
* Pointer to function can be retrieved with getPointerToFunction * Pointer to function can be retrieved with getPointerToFunction
*/ */
std::pair<Executable, llvm::ExecutionEngine *> Compile(const std::string & name, const ControlFlowGraph & cfg, bool generate_linkable_exits); std::pair<Executable, llvm::ExecutionEngine *> Compile(const std::string & name, u32 start_address, u32 instruction_count, bool generate_linkable_exits);
/// Retrieve compiler stats /// Retrieve compiler stats
Stats GetStats(); Stats GetStats();
@ -723,9 +499,6 @@ namespace ppu_recompiler_llvm {
/// Args of the LLVM function /// Args of the LLVM function
llvm::Value * args[MaxArgs]; llvm::Value * args[MaxArgs];
/// The CFG being compiled
const ControlFlowGraph * cfg;
/// Address of the current instruction being compiled /// Address of the current instruction being compiled
u32 current_instruction_address; u32 current_instruction_address;
@ -1021,8 +794,8 @@ namespace ppu_recompiler_llvm {
**/ **/
const Executable GetCompiledExecutableIfAvailable(u32 address); const Executable GetCompiledExecutableIfAvailable(u32 address);
/// Notify the recompilation engine about a newly detected trace. It takes ownership of the trace. /// Notify the recompilation engine about a newly detected block start.
void NotifyTrace(ExecutionTrace * execution_trace); void NotifyBlockStart(u32 address);
/// Log /// Log
llvm::raw_fd_ostream & Log(); llvm::raw_fd_ostream & Log();
@ -1035,65 +808,58 @@ namespace ppu_recompiler_llvm {
private: private:
/// An entry in the block table /// An entry in the block table
struct BlockEntry { struct BlockEntry {
/// Start address
u32 address;
/// Number of times this block was hit /// Number of times this block was hit
u32 num_hits; u32 num_hits;
/// Size of the CFG when it was last compiled /// Indicates whether this function has been analysed or not
size_t last_compiled_cfg_size; bool is_analysed;
/// The CFG for this block
ControlFlowGraph cfg;
/// Indicates whether the block has been compiled or not /// Indicates whether the block has been compiled or not
bool is_compiled; bool is_compiled;
BlockEntry(u32 start_address, u32 function_address) /// Indicate wheter the block is a function that can be completly compiled
/// that is, that has a clear "return" semantic and no indirect branch
bool is_compilable_function;
/// If the analysis was successfull, how long the block is.
u32 instructionCount;
/// If the analysis was successfull, which function does it call.
std::set<u32> calledFunctions;
BlockEntry(u32 start_address)
: num_hits(0) : num_hits(0)
, last_compiled_cfg_size(0) , address(start_address)
, is_compiled(false) , is_compiled(false)
, cfg(start_address, function_address) { , is_analysed(false)
, is_compilable_function(false)
, instructionCount(0) {
} }
std::string ToString() const { std::string ToString() const {
return fmt::format("0x%08X (0x%08X): NumHits=%u, LastCompiledCfgSize=%u, IsCompiled=%c", return fmt::format("0x%08X: NumHits=%u, IsCompiled=%c",
cfg.start_address, cfg.function_address, num_hits, last_compiled_cfg_size, is_compiled ? 'Y' : 'N'); address, num_hits, is_compiled ? 'Y' : 'N');
} }
bool operator == (const BlockEntry & other) const { bool operator == (const BlockEntry & other) const {
return cfg.start_address == other.cfg.start_address; return address == other.address;
} }
bool IsFunction() const {
return cfg.function_address == cfg.start_address;
}
struct hash {
size_t operator()(const BlockEntry * e) const {
return e->cfg.start_address;
}
};
struct equal_to {
bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const {
return *lhs == *rhs;
}
};
}; };
/// Log /// Log
llvm::raw_fd_ostream * m_log; llvm::raw_fd_ostream * m_log;
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue. /// Lock for accessing m_pending_address_start. TODO: Eliminate this and use a lock-free queue.
std::mutex m_pending_execution_traces_lock; std::mutex m_pending_address_start_lock;
/// Queue of execution traces pending processing /// Queue of block start address to process
std::list<ExecutionTrace *> m_pending_execution_traces; std::list<u32> m_pending_address_start;
/// Block table /// Block table
std::unordered_set<BlockEntry *, BlockEntry::hash, BlockEntry::equal_to> m_block_table; std::unordered_map<u32, BlockEntry> m_block_table;
/// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes.
std::unordered_map<ExecutionTrace::Id, std::vector<BlockEntry *>> m_processed_execution_traces;
/// Lock for accessing m_address_to_function. /// Lock for accessing m_address_to_function.
std::mutex m_address_to_function_lock; std::mutex m_address_to_function_lock;
@ -1131,10 +897,15 @@ namespace ppu_recompiler_llvm {
RecompilationEngine & operator = (RecompilationEngine && other) = delete; RecompilationEngine & operator = (RecompilationEngine && other) = delete;
/// Process an execution trace. /// Process an execution trace.
void ProcessExecutionTrace(const ExecutionTrace & execution_trace); /// Returns true if a block was compiled
bool ProcessExecutionTrace(u32);
/// Update a CFG /**
void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry); * Analyse block to get useful info (function called, has indirect branch...)
* This code is inspired from Dolphin PPC Analyst
* Return true if analysis is successful.
*/
bool AnalyseBlock(BlockEntry &functionData, size_t maxSize = 10000);
/// Compile a block /// Compile a block
void CompileBlock(BlockEntry & block_entry); void CompileBlock(BlockEntry & block_entry);
@ -1146,43 +917,6 @@ namespace ppu_recompiler_llvm {
static std::shared_ptr<RecompilationEngine> s_the_instance; static std::shared_ptr<RecompilationEngine> s_the_instance;
}; };
/// Finds interesting execution sequences
class Tracer {
public:
/// Trace type
enum class TraceType : u32 {
CallFunction,
EnterFunction,
ExitFromCompiledFunction,
Return,
Instruction,
ExitFromCompiledBlock,
};
Tracer();
Tracer(const Tracer & other) = delete;
Tracer(Tracer && other) = delete;
virtual ~Tracer();
Tracer & operator = (const Tracer & other) = delete;
Tracer & operator = (Tracer && other) = delete;
/// Notify the tracer
void Trace(TraceType trace_type, u32 arg1, u32 arg2);
/// Notify the tracer that the execution sequence is being terminated.
void Terminate();
private:
/// Call stack
std::vector<ExecutionTrace *> m_stack;
/// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine;
};
/** /**
* PPU execution engine * PPU execution engine
* Relies on PPUInterpreter1 to execute uncompiled code. * Relies on PPUInterpreter1 to execute uncompiled code.
@ -1216,9 +950,6 @@ namespace ppu_recompiler_llvm {
/// PPU instruction Decoder /// PPU instruction Decoder
PPUDecoder m_decoder; PPUDecoder m_decoder;
/// Execution tracer
Tracer m_tracer;
/// Recompilation engine /// Recompilation engine
std::shared_ptr<RecompilationEngine> m_recompilation_engine; std::shared_ptr<RecompilationEngine> m_recompilation_engine;