mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-04 05:51:27 +12:00
Utilize idle time to combine blocks
This commit is contained in:
parent
71c12360c7
commit
c12a98510c
2 changed files with 177 additions and 88 deletions
|
@ -4908,41 +4908,70 @@ raw_fd_ostream & RecompilationEngine::Log() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecompilationEngine::Task() {
|
void RecompilationEngine::Task() {
|
||||||
|
bool work_done_this_iteration = false;
|
||||||
|
bool work_done_last_iteration = false;
|
||||||
std::chrono::nanoseconds idling_time(0);
|
std::chrono::nanoseconds idling_time(0);
|
||||||
|
std::chrono::nanoseconds recompiling_time(0);
|
||||||
|
|
||||||
auto start = std::chrono::high_resolution_clock::now();
|
auto start = std::chrono::high_resolution_clock::now();
|
||||||
while (!TestDestroy() && !Emu.IsStopped()) {
|
while (!TestDestroy() && !Emu.IsStopped()) {
|
||||||
// Wait a few ms for something to happen
|
work_done_last_iteration = work_done_this_iteration;
|
||||||
auto idling_start = std::chrono::high_resolution_clock::now();
|
work_done_this_iteration = false;
|
||||||
WaitForAnySignal(250);
|
ExecutionTrace * execution_trace = nullptr;
|
||||||
auto idling_end = std::chrono::high_resolution_clock::now();
|
|
||||||
idling_time += std::chrono::duration_cast<std::chrono::nanoseconds>(idling_end - idling_start);
|
|
||||||
|
|
||||||
u32 num_processed = 0;
|
{
|
||||||
while (!TestDestroy() && !Emu.IsStopped()) {
|
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock);
|
||||||
ExecutionTrace * execution_trace;
|
|
||||||
|
|
||||||
{
|
auto i = m_pending_execution_traces.begin();
|
||||||
std::lock_guard<std::mutex> lock(m_pending_execution_traces_lock);
|
if (i != m_pending_execution_traces.end()) {
|
||||||
|
execution_trace = *i;
|
||||||
|
m_pending_execution_traces.erase(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
auto i = m_pending_execution_traces.begin();
|
if (execution_trace) {
|
||||||
if (i != m_pending_execution_traces.end()) {
|
ProcessExecutionTrace(*execution_trace);
|
||||||
execution_trace = *i;
|
delete execution_trace;
|
||||||
m_pending_execution_traces.erase(i);
|
work_done_this_iteration = true;
|
||||||
} else {
|
}
|
||||||
break;
|
|
||||||
|
if (!work_done_this_iteration) {
|
||||||
|
// TODO: Reduce the priority of the recompilation engine thread if its set to high priority
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!work_done_this_iteration && !work_done_last_iteration) {
|
||||||
|
auto recompiling_start = std::chrono::high_resolution_clock::now();
|
||||||
|
|
||||||
|
// Recompile the function with the most number of compiled fragments
|
||||||
|
auto candidate = m_function_table.end();
|
||||||
|
for (auto function_i = m_function_table.begin(); function_i != m_function_table.end(); function_i++) {
|
||||||
|
if ((*function_i)->num_compiled_fragments && (*function_i)->blocks.front()->IsFunction() && (*function_i)->blocks.front()->is_compiled) {
|
||||||
|
if (candidate != m_function_table.end()) {
|
||||||
|
if ((*function_i)->num_compiled_fragments > (*candidate)->num_compiled_fragments) {
|
||||||
|
candidate = function_i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
candidate = function_i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ProcessExecutionTrace(*execution_trace);
|
if (candidate != m_function_table.end()) {
|
||||||
delete execution_trace;
|
Log() << "Recompiling: " << (*candidate)->ToString() << "\n";
|
||||||
|
CompileBlock(*(*candidate), *((*candidate)->blocks.front()));
|
||||||
|
work_done_this_iteration = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto recompiling_end = std::chrono::high_resolution_clock::now();
|
||||||
|
recompiling_time += std::chrono::duration_cast<std::chrono::nanoseconds>(recompiling_end - recompiling_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Reduce the priority of the recompilation engine thread
|
if (!work_done_this_iteration) {
|
||||||
|
// Wait a few ms for something to happen
|
||||||
if (num_processed == 0) {
|
auto idling_start = std::chrono::high_resolution_clock::now();
|
||||||
// If we get here, it means the recompilation engine is idling.
|
WaitForAnySignal(250);
|
||||||
// We should use this oppurtunity to optimize the code.
|
auto idling_end = std::chrono::high_resolution_clock::now();
|
||||||
|
idling_time += std::chrono::duration_cast<std::chrono::nanoseconds>(idling_end - idling_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4955,6 +4984,7 @@ void RecompilationEngine::Task() {
|
||||||
Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n";
|
Log() << " Time spent building IR = " << compiler_stats.ir_build_time.count() / 1000000 << "ms\n";
|
||||||
Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n";
|
Log() << " Time spent optimizing = " << compiler_stats.optimization_time.count() / 1000000 << "ms\n";
|
||||||
Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n";
|
Log() << " Time spent translating = " << compiler_stats.translation_time.count() / 1000000 << "ms\n";
|
||||||
|
Log() << " Time spent recompiling = " << recompiling_time.count() / 1000000 << "ms\n";
|
||||||
Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n";
|
Log() << " Time spent idling = " << idling_time.count() / 1000000 << "ms\n";
|
||||||
Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n";
|
Log() << " Time spent doing misc tasks = " << (total_time.count() - idling_time.count() - compiler_stats.total_time.count()) / 1000000 << "ms\n";
|
||||||
Log() << "Ordinals allocated = " << m_next_ordinal << "\n";
|
Log() << "Ordinals allocated = " << m_next_ordinal << "\n";
|
||||||
|
@ -4968,6 +4998,8 @@ void RecompilationEngine::Task() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) {
|
void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution_trace) {
|
||||||
|
auto function_i = m_function_table.end();
|
||||||
|
|
||||||
auto execution_trace_id = execution_trace.GetId();
|
auto execution_trace_id = execution_trace.GetId();
|
||||||
auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id);
|
auto processed_execution_trace_i = m_processed_execution_traces.find(execution_trace_id);
|
||||||
if (processed_execution_trace_i == m_processed_execution_traces.end()) {
|
if (processed_execution_trace_i == m_processed_execution_traces.end()) {
|
||||||
|
@ -4992,16 +5024,16 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution
|
||||||
if (block_i == m_block_table.end()) {
|
if (block_i == m_block_table.end()) {
|
||||||
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address));
|
block_i = m_block_table.insert(m_block_table.end(), new BlockEntry(key.cfg.start_address, key.cfg.function_address));
|
||||||
|
|
||||||
// Update the function to block map
|
if (function_i == m_function_table.end()) {
|
||||||
auto function_to_block_i = m_function_to_blocks.find(execution_trace.function_address);
|
FunctionEntry key(execution_trace.function_address);
|
||||||
if (function_to_block_i == m_function_to_blocks.end()) {
|
function_i = m_function_table.find(&key);
|
||||||
function_to_block_i = m_function_to_blocks.insert(m_function_to_blocks.end(), std::make_pair(execution_trace.function_address, std::vector<BlockEntry *>()));
|
if (function_i == m_function_table.end()) {
|
||||||
|
function_i = m_function_table.insert(m_function_table.end(), new FunctionEntry(key.address));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto i = std::find(function_to_block_i->second.begin(), function_to_block_i->second.end(), *block_i);
|
// Update the function table
|
||||||
if (i == function_to_block_i->second.end()) {
|
(*function_i)->AddBlock(*block_i);
|
||||||
function_to_block_i->second.push_back(*block_i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp_block_list.push_back(*block_i);
|
tmp_block_list.push_back(*block_i);
|
||||||
|
@ -5024,7 +5056,12 @@ void RecompilationEngine::ProcessExecutionTrace(const ExecutionTrace & execution
|
||||||
if (!(*i)->is_compiled) {
|
if (!(*i)->is_compiled) {
|
||||||
(*i)->num_hits++;
|
(*i)->num_hits++;
|
||||||
if ((*i)->num_hits >= 1000) { // TODO: Make this configurable
|
if ((*i)->num_hits >= 1000) { // TODO: Make this configurable
|
||||||
CompileBlock(*(*i));
|
if (function_i == m_function_table.end()) {
|
||||||
|
FunctionEntry key(execution_trace.function_address);
|
||||||
|
function_i = m_function_table.find(&key);
|
||||||
|
}
|
||||||
|
|
||||||
|
CompileBlock(*(*function_i), *(*i));
|
||||||
(*i)->is_compiled = true;
|
(*i)->is_compiled = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5057,17 +5094,16 @@ void RecompilationEngine::UpdateControlFlowGraph(ControlFlowGraph & cfg, const E
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
|
void RecompilationEngine::CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry) {
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
Log() << "Compile: " << block_entry.ToString() << "\n";
|
Log() << "Compile: " << block_entry.ToString() << "\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ControlFlowGraph * cfg;
|
|
||||||
ControlFlowGraph temp_cfg(block_entry.cfg.start_address, block_entry.cfg.function_address);
|
ControlFlowGraph temp_cfg(block_entry.cfg.start_address, block_entry.cfg.function_address);
|
||||||
|
ControlFlowGraph * cfg;
|
||||||
if (block_entry.IsFunction()) {
|
if (block_entry.IsFunction()) {
|
||||||
// Form a CFG by merging all the blocks in this function
|
// Form a CFG by merging all the blocks in this function
|
||||||
auto function_to_block_i = m_function_to_blocks.find(block_entry.cfg.function_address);
|
for (auto block_i = function_entry.blocks.begin(); block_i != function_entry.blocks.end(); block_i++) {
|
||||||
for (auto block_i = function_to_block_i->second.begin(); block_i != function_to_block_i->second.end(); block_i++) {
|
|
||||||
temp_cfg += (*block_i)->cfg;
|
temp_cfg += (*block_i)->cfg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5084,6 +5120,12 @@ void RecompilationEngine::CompileBlock(BlockEntry & block_entry) {
|
||||||
auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), *cfg, true,
|
auto executable = m_compiler.Compile(fmt::Format("fn_0x%08X_%u", block_entry.cfg.start_address, block_entry.revision++), *cfg, true,
|
||||||
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
|
block_entry.IsFunction() ? true : false /*generate_linkable_exits*/);
|
||||||
m_executable_lookup[ordinal] = executable;
|
m_executable_lookup[ordinal] = executable;
|
||||||
|
|
||||||
|
if (block_entry.IsFunction()) {
|
||||||
|
function_entry.num_compiled_fragments = 0;
|
||||||
|
} else {
|
||||||
|
function_entry.num_compiled_fragments++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
|
std::shared_ptr<RecompilationEngine> RecompilationEngine::GetInstance() {
|
||||||
|
|
|
@ -246,55 +246,6 @@ namespace ppu_recompiler_llvm {
|
||||||
/// Pointer to an executable
|
/// Pointer to an executable
|
||||||
typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context);
|
typedef u32(*Executable)(PPUThread * ppu_state, PPUInterpreter * interpreter, u64 context);
|
||||||
|
|
||||||
/// An entry in the block table
|
|
||||||
struct BlockEntry {
|
|
||||||
/// Number of times this block was hit
|
|
||||||
u32 num_hits;
|
|
||||||
|
|
||||||
/// The current revision number of this function
|
|
||||||
u32 revision;
|
|
||||||
|
|
||||||
/// The CFG for this block
|
|
||||||
ControlFlowGraph cfg;
|
|
||||||
|
|
||||||
/// Indicates whether the block has been compiled or not
|
|
||||||
bool is_compiled;
|
|
||||||
|
|
||||||
BlockEntry(u32 start_address, u32 function_address)
|
|
||||||
: num_hits(0)
|
|
||||||
, revision(0)
|
|
||||||
, is_compiled(false)
|
|
||||||
, cfg(start_address, function_address) {
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ToString() const {
|
|
||||||
return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c",
|
|
||||||
cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N');
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator == (const BlockEntry & other) const {
|
|
||||||
return cfg.start_address == other.cfg.start_address;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsFunction() const {
|
|
||||||
return cfg.function_address == cfg.start_address;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct hash {
|
|
||||||
size_t operator()(const BlockEntry * e) const {
|
|
||||||
return e->cfg.start_address;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct equal_to {
|
|
||||||
bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const {
|
|
||||||
return *lhs == *rhs;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace ppu_recompiler_llvm {
|
|
||||||
/// PPU compiler that uses LLVM for code generation and optimization
|
/// PPU compiler that uses LLVM for code generation and optimization
|
||||||
class Compiler : protected PPUOpcodes, protected PPCDecoder {
|
class Compiler : protected PPUOpcodes, protected PPCDecoder {
|
||||||
public:
|
public:
|
||||||
|
@ -1008,6 +959,102 @@ namespace ppu_recompiler_llvm {
|
||||||
static std::shared_ptr<RecompilationEngine> GetInstance();
|
static std::shared_ptr<RecompilationEngine> GetInstance();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// An entry in the block table
|
||||||
|
struct BlockEntry {
|
||||||
|
/// Number of times this block was hit
|
||||||
|
u32 num_hits;
|
||||||
|
|
||||||
|
/// The current revision number of this function
|
||||||
|
u32 revision;
|
||||||
|
|
||||||
|
/// The CFG for this block
|
||||||
|
ControlFlowGraph cfg;
|
||||||
|
|
||||||
|
/// Indicates whether the block has been compiled or not
|
||||||
|
bool is_compiled;
|
||||||
|
|
||||||
|
BlockEntry(u32 start_address, u32 function_address)
|
||||||
|
: num_hits(0)
|
||||||
|
, revision(0)
|
||||||
|
, is_compiled(false)
|
||||||
|
, cfg(start_address, function_address) {
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ToString() const {
|
||||||
|
return fmt::Format("0x%08X (0x%08X): NumHits=%u, Revision=%u, IsCompiled=%c",
|
||||||
|
cfg.start_address, cfg.function_address, num_hits, revision, is_compiled ? 'Y' : 'N');
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator == (const BlockEntry & other) const {
|
||||||
|
return cfg.start_address == other.cfg.start_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsFunction() const {
|
||||||
|
return cfg.function_address == cfg.start_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct hash {
|
||||||
|
size_t operator()(const BlockEntry * e) const {
|
||||||
|
return e->cfg.start_address;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct equal_to {
|
||||||
|
bool operator()(const BlockEntry * lhs, const BlockEntry * rhs) const {
|
||||||
|
return *lhs == *rhs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An entry in the function table
|
||||||
|
struct FunctionEntry {
|
||||||
|
/// Address of the function
|
||||||
|
u32 address;
|
||||||
|
|
||||||
|
/// Number of compiled fragments
|
||||||
|
u32 num_compiled_fragments;
|
||||||
|
|
||||||
|
/// Blocks in the function
|
||||||
|
std::list<BlockEntry *> blocks;
|
||||||
|
|
||||||
|
FunctionEntry(u32 address)
|
||||||
|
: address(address)
|
||||||
|
, num_compiled_fragments(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddBlock(BlockEntry * block_entry) {
|
||||||
|
auto i = std::find(blocks.begin(), blocks.end(), block_entry);
|
||||||
|
if (i == blocks.end()) {
|
||||||
|
if (block_entry->IsFunction()) {
|
||||||
|
// The first block must be the starting block of the function
|
||||||
|
blocks.push_front(block_entry);
|
||||||
|
} else {
|
||||||
|
blocks.push_back(block_entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string ToString() const {
|
||||||
|
return fmt::Format("0x%08X: NumCompiledFragments=%u, NumBlocks=%u", address, num_compiled_fragments, blocks.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator == (const FunctionEntry & other) const {
|
||||||
|
return address == other.address;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct hash {
|
||||||
|
size_t operator()(const FunctionEntry * f) const {
|
||||||
|
return f->address;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct equal_to {
|
||||||
|
bool operator()(const FunctionEntry * lhs, const FunctionEntry * rhs) const {
|
||||||
|
return *lhs == *rhs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue.
|
/// Lock for accessing m_pending_execution_traces. TODO: Eliminate this and use a lock-free queue.
|
||||||
std::mutex m_pending_execution_traces_lock;
|
std::mutex m_pending_execution_traces_lock;
|
||||||
|
|
||||||
|
@ -1017,8 +1064,8 @@ namespace ppu_recompiler_llvm {
|
||||||
/// Block table
|
/// Block table
|
||||||
std::unordered_set<BlockEntry *, BlockEntry::hash, BlockEntry::equal_to> m_block_table;
|
std::unordered_set<BlockEntry *, BlockEntry::hash, BlockEntry::equal_to> m_block_table;
|
||||||
|
|
||||||
/// Maps a function to the set of all blocks in the function. Key is the address of the function.
|
/// Function table
|
||||||
std::unordered_map<u32, std::vector<BlockEntry *>> m_function_to_blocks;
|
std::unordered_set<FunctionEntry *, FunctionEntry::hash, FunctionEntry::equal_to> m_function_table;
|
||||||
|
|
||||||
/// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes.
|
/// Execution traces that have been already encountered. Data is the list of all blocks that this trace includes.
|
||||||
std::unordered_map<ExecutionTrace::Id, std::vector<BlockEntry *>> m_processed_execution_traces;
|
std::unordered_map<ExecutionTrace::Id, std::vector<BlockEntry *>> m_processed_execution_traces;
|
||||||
|
@ -1057,7 +1104,7 @@ namespace ppu_recompiler_llvm {
|
||||||
void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry);
|
void UpdateControlFlowGraph(ControlFlowGraph & cfg, const ExecutionTraceEntry & this_entry, const ExecutionTraceEntry * next_entry);
|
||||||
|
|
||||||
/// Compile a block
|
/// Compile a block
|
||||||
void CompileBlock(BlockEntry & block_entry);
|
void CompileBlock(FunctionEntry & function_entry, BlockEntry & block_entry);
|
||||||
|
|
||||||
/// Mutex used to prevent multiple creation
|
/// Mutex used to prevent multiple creation
|
||||||
static std::mutex s_mutex;
|
static std::mutex s_mutex;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue